target_dir <- "../../ppcc_iflu_data/read_data"
# Reading the data back in
s_exp1 <- read.csv(file.path(target_dir, "s_exp1.csv"))
s_exp1_et <- read.csv(file.path(target_dir, "s_exp1_et.csv"))
digit_span <- read.csv(file.path(target_dir, "digit_span.csv"))
battery <- read.csv(file.path(target_dir, "battery.csv"))
lextale <- read.csv(file.path(target_dir, "lextale.csv"))
italian_lextale <- read.csv(file.path(target_dir, "italian_lextale.csv"))
autism <- read.csv(file.path(target_dir, "autism.csv"))
acoustic_data <- read.csv(file.path(target_dir, "acoustic_data.csv"))
removal <- read.csv(file.path(target_dir, "removal.csv"))
#battery tasks
conflicts_prefer(dplyr::filter)
## [conflicted] Will prefer dplyr::filter over any other package.
battery_tidy<-battery%>%
select(participant.private.id,tree.node.key,task.name,screen.name,screen.number,display,reaction.time,response,sound1,sound2,base1,base2,same)%>%
filter(display=="block" &screen.name == "Screen 3")%>%
mutate(responses=case_when(response == "'z'"~1,
response == "'m'"~0),
correct=if_else(responses==same,1,0),
difference=abs(base1-base2))
battery_tidy%>%ggplot(aes(x=reaction.time))+
geom_histogram()+
facet_wrap(participant.private.id~.)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
mad_standard=3
battery_cleaned<-battery_tidy%>%
#group_by(participant.private.id)%>%
dplyr::filter(reaction.time>200)%>%
mutate(log_rt=sqrt(reaction.time))%>%
mutate(display_medians = median(log_rt),
abs_distance = abs(display_medians-log_rt),
mean_distance = mean(abs_distance),
upper_mad = display_medians+(mean_distance*mad_standard),
lower_mad = display_medians-(mean_distance*mad_standard))%>%
group_by(participant.private.id)%>%
mutate(participants_medians = median(log_rt))%>%
dplyr::filter(log_rt<upper_mad &log_rt>lower_mad)
removal[nrow(removal) + 1, ] <- c("battery removal",
nrow(battery_tidy) - nrow(battery_cleaned))
battery_cleaned%>%ggplot(aes(x=log_rt))+
geom_histogram()+
geom_point(aes(x=upper_mad,y=0),color="red")+
geom_point(aes(x=lower_mad,y=0),color="blue")+
facet_wrap(participant.private.id~.)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
battery_cleaned%>%ggplot(aes(x=factor(participant.private.id),y=participants_medians))+
geom_point()+
geom_point(aes(y=upper_mad,x=factor(participant.private.id)),color="red")+
geom_point(aes(y=lower_mad,x=factor(participant.private.id)),color="blue")
battery_cleaned_part_agg<-battery_cleaned%>%
group_by(participant.private.id)%>%
summarise(score=mean(correct))%>%
ungroup()%>%
mutate(medians = median(score),
abs_distance = abs(medians-score),
mean_distance = mean(abs_distance),
upper_mad = medians+(mean_distance*mad_standard),
lower_mad = medians-(mean_distance*mad_standard))
battery_cleaned_part_agg%>%
ggplot(aes(x=factor(participant.private.id),y=score))+
geom_point()+
geom_point(aes(x=factor(participant.private.id),y=upper_mad),color="red")+
geom_point(aes(x=factor(participant.private.id),y=lower_mad),color="red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
battery_cleaned_part_agg_keep<-battery_cleaned_part_agg%>%
filter(score<upper_mad & score>lower_mad)
battery_cleaned_item_agg<-battery_cleaned%>%
group_by(difference,participant.private.id)%>%
summarise(score=mean(correct))%>%
ungroup()%>%
mutate(medians = median(score),
abs_distance = abs(medians-score),
mean_distance = mean(abs_distance),
upper_mad = medians+(mean_distance*mad_standard),
lower_mad = medians-(mean_distance*mad_standard))
## `summarise()` has grouped output by 'difference'. You can override using the
## `.groups` argument.
battery_cleaned_item_agg%>%
ggplot(aes(x=factor(difference),y=score))+
geom_boxplot()+
geom_point()+
geom_point(aes(x=factor(difference),y=upper_mad),color="red")+
geom_point(aes(x=factor(difference),y=lower_mad),color="red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
battery_agg2<-battery_cleaned%>%
group_by(participant.private.id, task.name) %>%
summarize(score=mean(correct))
## `summarise()` has grouped output by 'participant.private.id'. You can override
## using the `.groups` argument.
battery_agg2%>%ggplot(aes(x=task.name,y=score,color=factor(task.name)))+
geom_boxplot()+
geom_jitter()+
theme_minimal()
battery_agg<-battery_cleaned%>%
mutate(
hit = if_else(same == 1 & responses == 1L, 1L, 0L),
f_a = if_else(same == 0 & responses == 1L, 1L, 0L),
miss = if_else(same == 0 & responses == 0L, 1L, 0L),
c_r = if_else(same == 1 & responses == 0L, 1L, 0L))%>%
group_by(participant.private.id, task.name) %>%
summarize(
score=mean(correct)+.5,
hit_count = sum(hit)+.5,
f_a_count = sum(f_a)+.5,
miss_count = sum(miss)+.5,
c_r_count = sum(c_r)+.5,
.groups = 'drop')%>%
mutate(
dprime = dprime(hit_count,
f_a_count,
hit_count + miss_count,
f_a_count + c_r_count)$dprime,
c = dprime(hit_count,
f_a_count,
hit_count + miss_count,
f_a_count + c_r_count)$c)%>%
select(participant.private.id, task.name, dprime, c)
battery_agg%>%ggplot(aes(x=factor(task.name),y=dprime,color=factor(task.name),alpha=.4))+
geom_line(aes(group=factor(participant.private.id)),color="black")+
geom_violin()+
geom_boxplot(width=.5)+
geom_point()+
theme_minimal()
battery_agg%>%ggplot(aes(x=c,y=dprime,color=factor(task.name)))+
geom_point()+
theme_minimal()+
facet_grid(.~factor(task.name))
battery_agg_wider<-battery_agg%>%
pivot_wider(names_from = task.name,values_from = c(dprime:c))
battery_agg_wider_simp<-battery_agg_wider
ggpairs(battery_agg_wider_simp%>%select(!participant.private.id))
#digit span
digit_tidy<-digit_span%>%
select(participant.private.id,tree.node.key,task.name,
target,response,length,correct.,total.correct)%>%
filter(correct.==1)%>%
group_by(participant.private.id)%>%
mutate(length=as.numeric(length))%>%
summarize(max_working_memory=max(length),
min_working_memory=min(length),
mean_working_memory=mean(length),
max_correct=max(total.correct))
digit_tidy%>%
ggplot(aes(x=1,y=max_correct,color=factor(max_working_memory)))+
geom_jitter()
digit_cleaned<-digit_tidy%>%
ungroup()%>%
mutate(display_medians = median(max_correct),
abs_distance = abs(display_medians-mean_working_memory),
mean_distance = mean(abs_distance),
upper_mad = display_medians+(mean_distance*mad_standard),
lower_mad = display_medians-(mean_distance*mad_standard))%>%
filter(max_correct<upper_mad &max_correct>lower_mad)
digit_cleaned%>%
ggplot(aes(x=mean_working_memory))+
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
digit_cleaned%>%
ggplot(aes(x=1,y=mean_working_memory,color=factor(max_working_memory)))+
geom_jitter()
removal[nrow(removal) + 1, ] <- c("digit_span_participant",
nrow(digit_tidy) - nrow(digit_cleaned))
digit_cleaned_simp<-digit_cleaned%>%select(participant.private.id,mean_working_memory)
#lextale English
lextale_tidy<-lextale%>%
select(participant.private.id,screen.name,zone.type,reaction.time,correct)%>%
filter(zone.type=="response_keyboard")%>%
filter(reaction.time>250)
lextale_tidy%>%ggplot(aes(x=reaction.time))+geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
lextale_agg<-lextale_tidy%>%
group_by(participant.private.id)%>%
summarize(lextale_score=mean(correct))
lextale_agg_cleaned<-lextale_agg%>%
ungroup()%>%
mutate(medians = median(lextale_score),
abs_distance = abs(medians-lextale_score),
mean_distance = mean(abs_distance),
upper_mad = medians+(mean_distance*mad_standard),
lower_mad = medians-(mean_distance*mad_standard))%>%
filter(lextale_score<upper_mad &lextale_score>lower_mad)
lextale_agg_cleaned%>%ggplot(aes(x=lextale_score))+
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
removal[nrow(removal) + 1, ] <- c("lextale_agg_participant",
nrow(lextale_agg) - nrow(lextale_agg_cleaned))
lextale_agg_cleaned_simp<-lextale_agg_cleaned%>%select(lextale_score,participant.private.id)
#lextale italian
italian_lextale_tidy<-italian_lextale%>%
select(participant.private.id,screen.name,zone.type,reaction.time,correct)%>%
filter(zone.type=="response_keyboard")%>%
filter(reaction.time>250)
italian_lextale_tidy%>%ggplot(aes(x=reaction.time))+geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
italian_lextale_agg<-italian_lextale_tidy%>%
group_by(participant.private.id)%>%
summarize(italian_lextale_score=mean(correct))
italian_lextale_agg_cleaned<-italian_lextale_agg%>%
ungroup()%>%
mutate(medians = median(italian_lextale_score),
abs_distance = abs(medians-italian_lextale_score),
mean_distance = mean(abs_distance),
upper_mad = medians+(mean_distance*mad_standard),
lower_mad = medians-(mean_distance*mad_standard))%>%
filter(italian_lextale_score<upper_mad &italian_lextale_score>lower_mad)
italian_lextale_agg_cleaned%>%ggplot(aes(x=italian_lextale_score))+
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
removal[nrow(removal) + 1, ] <- c("italian_lextale_agg_participant",
nrow(italian_lextale_agg) - nrow(italian_lextale_agg_cleaned))
italian_lextale_agg_cleaned_simp<-italian_lextale_agg_cleaned%>%select(italian_lextale_score,participant.private.id)
#ASD scores
autism_tidy<-autism%>%
select(participant.private.id,question.key,response)%>%
filter(str_detect(question.key, "quantised"))%>%
mutate(response=as.numeric(response))
agree<-c(1, 2, 4, 5, 6, 7, 9, 12, 13, 16, 18, 19, 20, 21,
22, 23, 26, 33, 35, 39, 41,42, 43, 45, 46)
disagree<-c(3, 8, 10, 11, 14, 15, 17, 24, 25, 27, 28, 29,
30, 31, 32,34, 36, 37, 38, 40, 44, 47, 48, 49, 50)
agr_key<-c("1","2")
dis_key<-c("3","4")
autism_cleaned<-autism_tidy%>%
mutate(key_response = str_replace_all(question.key, "AQ", ""))%>%
mutate(key_response = str_replace_all(key_response, "-quantised", ""))%>%
mutate(points=case_when(key_response%in%agree & response %in% agr_key~1,
key_response%in%agree & response %in% dis_key~0,
key_response%in%disagree & response %in% dis_key~1,
key_response%in%disagree & response %in% agr_key~0))%>%
group_by(participant.private.id)%>%
summarize(autism_score=mean(points))
autism_cleaned%>%ggplot(aes(x=autism_score))+geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#minor task clean up above #experiment 1 behaviorial data clean up
s_exp1_cleaned<-s_exp1%>%
select(participant.private.id,spreadsheet.row,response,real_1_distractor_0,zone.type,display,audio,
q1_image,q2_image,q3_image,q4_image,
id,grouping_id,sorting,audio)%>%
filter(zone.type=="response_button_image")%>%
mutate(audio=tolower(audio),
word_play = str_remove(str_extract(audio, "^[^_]+"), "\\.wav"),
word_play_start = substr(word_play, 1, 3),
stress_play = str_remove(str_extract(audio, "(?<=_)[^_]+"), "\\.wav"),
response_word = str_extract(response, "(?<=\\d)[a-zA-Z]+(?=\\.jpeg)"),
response_thing = str_extract(response, "(?<=\\D)\\d{2}(?=[a-zA-Z]+\\.jpeg)"),
correct=if_else(response_word==word_play,1,0))%>%
mutate(stress_play=if_else(stress_play=="p","Penultimate","Anti-penultimate"),
q1_word= str_extract(q1_image, "(?<=\\d)[a-zA-Z]+(?=\\.jpeg)"),
q2_word= str_extract(q2_image, "(?<=\\d)[a-zA-Z]+(?=\\.jpeg)"),
q3_word= str_extract(q3_image, "(?<=\\d)[a-zA-Z]+(?=\\.jpeg)"),
q4_word= str_extract(q4_image, "(?<=\\d)[a-zA-Z]+(?=\\.jpeg)"))%>%
mutate(target_quantile=case_when(q1_word==word_play~1,
q2_word==word_play~2,
q3_word==word_play~3,
q4_word==word_play~4),
competitor_quantile = case_when(
q1_word != word_play & str_detect(q1_word,word_play_start) ~ 1,
q2_word != word_play & str_detect(q2_word,word_play_start) ~ 2,
q3_word != word_play & str_detect(q3_word,word_play_start) ~ 3,
q4_word != word_play & str_detect(q4_word,word_play_start) ~ 4))%>%
rowwise() %>%
mutate(distractor_1_quantile = {
possible_values <- setdiff(1:4, c(target_quantile, competitor_quantile))
sample(possible_values, 1)
})%>%
mutate(distractor_2_quantile = 10-sum(distractor_1_quantile,target_quantile,competitor_quantile))%>%
ungroup()
s_exp1_cleaned<-s_exp1_cleaned%>%
mutate(target = case_when(target_quantile == 1 ~ q1_word,
target_quantile == 2 ~ q2_word,
target_quantile == 3 ~ q3_word,
target_quantile == 4 ~ q4_word),
competitor = case_when(competitor_quantile == 1 ~ q1_word,
competitor_quantile == 2 ~ q2_word,
competitor_quantile == 3 ~ q3_word,
competitor_quantile == 4 ~ q4_word),
dist_1 = case_when(distractor_1_quantile == 1 ~ q1_word,
distractor_1_quantile == 2 ~ q2_word,
distractor_1_quantile == 3 ~ q3_word,
distractor_1_quantile == 4 ~ q4_word),
dist_2 = case_when(distractor_2_quantile == 1 ~ q1_word,
distractor_2_quantile == 2 ~ q2_word,
distractor_2_quantile == 3 ~ q3_word,
distractor_2_quantile == 4 ~ q4_word))
item_agg<-s_exp1_cleaned%>%group_by(word_play)%>%
summarise(score=mean(correct))%>%
ungroup()%>%
mutate(medians = median(score),
abs_distance = abs(medians-score),
mean_distance = mean(abs_distance),
upper_mad = medians+(mean_distance*mad_standard),
lower_mad = medians-(mean_distance*mad_standard))
item_agg%>%
ggplot(aes(x=factor(word_play),y=score))+
geom_point()+
geom_point(aes(x=factor(word_play),y=upper_mad),color="red")+
geom_point(aes(x=factor(word_play),y=lower_mad),color="red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
item_agg_keep<-item_agg%>%
filter(score<upper_mad & score>lower_mad)
removal[nrow(removal) + 1, ] <- c("exp1_item_removed",
length(unique(item_agg$word_play)) - length(unique(item_agg_keep$word_play)))
part_agg<-s_exp1_cleaned%>%group_by(participant.private.id)%>%
summarise(score=mean(correct))%>%
ungroup()%>%
mutate(medians = median(score),
abs_distance = abs(medians-score),
mean_distance = mean(abs_distance),
upper_mad = medians+(mean_distance*mad_standard),
lower_mad = medians-(mean_distance*mad_standard))
part_agg%>%
ggplot(aes(x=factor(participant.private.id),y=score))+
geom_point()+
geom_point(aes(x=factor(participant.private.id),y=upper_mad),color="red")+
geom_point(aes(x=factor(participant.private.id),y=lower_mad),color="red")+
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
part_agg_keep<-part_agg%>%
filter(score<upper_mad & score>lower_mad)
removal[nrow(removal) + 1, ] <- c("exp1_part_removed",
length(unique(part_agg$participant.private.id)) - length(unique(part_agg_keep$participant.private.id)))
#s_exp1_cleaned<-s_exp1_cleaned%>%filter(participant.private.id%in%part_agg_keep$participant.private.id)
#s_exp1_cleaned<-s_exp1_cleaned%>%filter(word_play%in%item_agg_keep$word_play)
length(unique(s_exp1_cleaned$participant.private.id))
## [1] 50
length(unique(s_exp1_cleaned$word_play))
## [1] 128
list_o_word<-c('canapa','candido','celebre','codice','comico','eremo','estero','federa','forbice','fragola','impeto','lattice','loculo','macabro','mastice','missile','monito','panico','protesi','remora','salice','senape','tonaca','zigomo','canale','candito','celeste','codino','comizio','erede','esteta','fedele','forbito','fragore','impero','lattina','locusta','macaco','mastino','missiva','monile','panino','protesta','remoto','saliva','senato','tonale','zigote')
s_exp1_cleaned<-s_exp1_cleaned%>%
filter(correct==1)%>%
filter(word_play%in%list_o_word)
prac<-s_exp1_cleaned%>%ungroup()%>%group_by(real_1_distractor_0,stress_play,word_play)%>%count()
#exp 1 eye-tracking data clean up
s_exp1_et_cleaned<-s_exp1_et%>%
select(participant_id,spreadsheet_row,type,time_stamp,
time_elapsed,x_pred_normalised,y_pred_normalised,face_conf)%>%
mutate(participant.private.id=participant_id,
spreadsheet.row=spreadsheet_row)%>%
select(!c(participant_id,spreadsheet_row))%>%
filter(type=="prediction")
origin<-c(0,0)
s_exp1_data_all <- s_exp1_et_cleaned %>%
left_join(s_exp1_cleaned)%>%
na.omit()%>%
filter(face_conf>.5)%>%
mutate(x_pred_normalised=x_pred_normalised-.5,
y_pred_normalised=y_pred_normalised-.5)%>%
filter(x_pred_normalised<1&x_pred_normalised>-1)%>%
filter(y_pred_normalised<1&y_pred_normalised>-1)%>%
mutate(current_quadrant = case_when(
x_pred_normalised < origin[1] & y_pred_normalised > origin[2] ~ q1_word,
x_pred_normalised > origin[1] & y_pred_normalised > origin[2] ~ q2_word,
x_pred_normalised < origin[1] & y_pred_normalised < origin[2] ~ q3_word,
x_pred_normalised > origin[1] & y_pred_normalised < origin[2] ~ q4_word))%>%
mutate(target_looks=if_else(current_quadrant==target,1,0),
competitor_looks=if_else(current_quadrant==competitor,1,0),
dist1_looks=if_else(current_quadrant==dist_1,1,0),
dist2_looks=if_else(current_quadrant==dist_2,1,0))%>%
group_by(participant.private.id,spreadsheet.row,stress_play)%>%
mutate(time=time_stamp-min(time_stamp),
time_rounded = round(time / 50) * 50)%>%
mutate(time_rounded=time_rounded-1000)%>%
mutate(time_rounded=time_rounded-200)%>%
filter(time_rounded<1000&time_rounded>=0)
## Joining with `by = join_by(participant.private.id, spreadsheet.row)`
ets_exp1_all_agg<-s_exp1_data_all%>%
group_by(stress_play,time_rounded)%>%
summarise(mean_target_looks=mean(target_looks),
mean_competitor_looks=mean(competitor_looks),
mean_dist1_looks=mean(dist1_looks),
mean_dist2_looks=mean(dist2_looks))%>%
ungroup()%>%
pivot_longer(cols=c(mean_target_looks:mean_dist2_looks),names_to = "names",values_to="values")
## `summarise()` has grouped output by 'stress_play'. You can override using the
## `.groups` argument.
penn_times<-c(300, 500, 650)
anti_times<-c(390, 530, 625)
df_1 <- data.frame(
time_rounded = penn_times,
values = 0.6,
labels = c("ca", "LA", "ta")
)
df_2 <- data.frame(
time_rounded = anti_times,
values = 0.6,
labels = c("CA", "la", "mo")
)
plot1<-ets_exp1_all_agg %>%
filter(stress_play == "Penultimate") %>%
ggplot(aes()) +
geom_text(data=df_1,aes(label=labels,x = time_rounded, y = values),
family = "Arial", size = 4, fontface = "bold")+
geom_point(aes(x = time_rounded, y = values, color = names)) +
geom_smooth(aes(x = time_rounded, y = values, color = names),se=FALSE)+
scale_y_continuous(breaks=seq(0, 1, by = .25),
labels=seq(0, 1, by = .25))+
scale_x_continuous(
breaks = seq(min(ets_exp1_all_agg$time_rounded, na.rm = TRUE),
max(ets_exp1_all_agg$time_rounded, na.rm = TRUE), by = 200),
labels = seq(min(ets_exp1_all_agg$time_rounded, na.rm = TRUE),
max(ets_exp1_all_agg$time_rounded, na.rm = TRUE), by = 200),
minor_breaks = c(0,200,201, 396, 566, 699,800),
position = "bottom")+
theme_minimal() +
theme(
axis.text.x.top = element_text(color = "black"),
axis.text.x = element_text(color = "black"),
panel.grid.major.x = element_line(color = "grey", size = 0.1),
panel.grid.minor.x = element_line(linetype = "dotted", color = "black", size = 0.5),
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_line(color = "grey", size = 0.1),
legend.position = c(0.1, 0.9),
legend.justification = c(0, 1),
legend.background = element_rect(fill = "white", colour = "black"))+
labs(x = NULL,y= "") +
guides(color = guide_legend(title = "Looks to:"))+
scale_color_manual(
values = c("mean_target_looks" = "#c41230", "mean_competitor_looks" = "#007bc0",
"mean_dist1_looks" = "#A7A7A9", "mean_dist2_looks" = "#000000"),
labels = c("Competitor", "Distractor", "Distractor","Target")
)
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
## 3.5.0.
## ℹ Please use the `legend.position.inside` argument of `theme()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
plot2<-ets_exp1_all_agg %>%
filter(stress_play == "Anti-penultimate") %>%
ggplot(aes()) +
geom_text(data=df_2,aes(label=labels,x = time_rounded, y = values),
family = "Arial", size = 4, fontface = "bold")+
geom_point(aes(x = time_rounded, y = values, color = names)) +
geom_smooth(aes(x = time_rounded, y = values, color = names),se=FALSE)+
scale_y_continuous(breaks=seq(0, 1, by = .25),
labels=seq(0, 1, by = .25))+
scale_x_continuous(
breaks = seq(min(ets_exp1_all_agg$time_rounded, na.rm = TRUE),
max(ets_exp1_all_agg$time_rounded, na.rm = TRUE), by = 200),
labels = seq(min(ets_exp1_all_agg$time_rounded, na.rm = TRUE),
max(ets_exp1_all_agg$time_rounded, na.rm = TRUE), by = 200),
minor_breaks = c(0,200,201, 499, 566, 669,800),
position = "bottom")+
theme_minimal() +
theme(
axis.text.x.top = element_text(color = "black"),
axis.text.x = element_blank(),
panel.grid.major.x = element_line(color = "grey", size = 0.1),
panel.grid.minor.x = element_line(linetype = "dotted", color = "black", size = 0.5),
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_line(color = "grey", size = 0.1),
legend.position = "none",
legend.justification = c(0, 1),
legend.background = element_rect(fill = "white", colour = "black"))+
labs(x = NULL,y= "") +
guides(color = guide_legend(title = "Looks to:"))+
scale_color_manual(
values = c("mean_target_looks" = "#c41230", "mean_competitor_looks" = "#007bc0",
"mean_dist1_looks" = "#A7A7A9", "mean_dist2_looks" = "#000000"),
labels = c("Competitor", "Distractor", "Distractor","Target")
)
combined_plot <- plot_grid(plot1, plot2, ncol = 1)+
draw_label("Anti-penultimate (bottom) and penultimate (top) items proportion of looks", x = 0.02, y = 0.5, angle = 90, size = 14)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
combined_plot
ggsave("../visuals/pen_vs_anti_pen.jpeg", plot = combined_plot, width = 6, height = 8, dpi = 300)
penn_times<-c(396, 566,699)
anti_times<-c(499, 566,669)
comp_targ<-s_exp1_data_all%>%
ungroup()%>%
filter(time_rounded<699)%>%
mutate(syllable=case_when(time_rounded<200~0,
stress_play=="Penultimate" & time_rounded>200 &
time_rounded<penn_times[1]~1,
stress_play=="Penultimate" & time_rounded>=penn_times[1] &
time_rounded<mean(penn_times[2],penn_times[3]) ~2,
stress_play=="Penultimate" & time_rounded<=penn_times[3]~3,
stress_play=="Anti-penultimate" & time_rounded>200 &
time_rounded<anti_times[1]~1,
stress_play=="Anti-penultimate" & time_rounded>=anti_times[1] &
time_rounded<mean(anti_times[2],anti_times[3]) ~2,
stress_play=="Anti-penultimate" & time_rounded<=anti_times[3]~3))
comp_tar_longer<-comp_targ%>%
pivot_longer(cols=c(target_looks:dist2_looks),names_to = "names",values_to="values")%>%
mutate(syllable=as.factor(syllable),
stress_play=as.factor(stress_play))
contrasts(comp_tar_longer$stress_play)<-c(.5,-.5)
contrasts(comp_tar_longer$stress_play)
## [,1]
## Anti-penultimate 0.5
## Penultimate -0.5
comp_tar_longer$names<-as.factor(comp_tar_longer$names)
contrasts(comp_tar_longer$names)
## dist1_looks dist2_looks target_looks
## competitor_looks 0 0 0
## dist1_looks 1 0 0
## dist2_looks 0 1 0
## target_looks 0 0 1
#data$looks <- factor(data$looks, levels = c("dist1_looks", "dist2_looks", "target_looks", "competitor_looks"))
contrasts(factor(comp_tar_longer$names))
## dist1_looks dist2_looks target_looks
## competitor_looks 0 0 0
## dist1_looks 1 0 0
## dist2_looks 0 1 0
## target_looks 0 0 1
contrasts(comp_tar_longer$stress_play)
## [,1]
## Anti-penultimate 0.5
## Penultimate -0.5
levels(comp_tar_longer$stress_play)<-c("Anti-penultimate","Penultimate")
levels(comp_tar_longer$stress_play)
## [1] "Anti-penultimate" "Penultimate"
comp_tar_model<-glm(values~names*stress_play*syllable,data=comp_tar_longer,family = "binomial")
comp_tar_model1<-glm(values~stress_play*names+syllable,data=comp_tar_longer,family = "binomial")
comp_tar_model2<-glm(values~stress_play+names*syllable,data=comp_tar_longer,family = "binomial")
comp_tar_model3<-glm(values~stress_play+names+syllable,data=comp_tar_longer,family = "binomial")
summary(comp_tar_model)
##
## Call:
## glm(formula = values ~ names * stress_play * syllable, family = "binomial",
## data = comp_tar_longer)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.9914 -0.7900 -0.6976 -0.1046 1.9024
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.988512 0.025298 -39.074 < 2e-16
## namesdist1_looks -0.111691 0.036266 -3.080 0.00207
## namesdist2_looks -0.169471 0.036565 -4.635 3.57e-06
## namestarget_looks -0.165842 0.036528 -4.540 5.62e-06
## stress_play1 -0.032196 0.050597 -0.636 0.52457
## syllable1 0.088697 0.036242 2.447 0.01439
## syllable2 0.065407 0.040449 1.617 0.10588
## syllable3 -0.034831 0.039581 -0.880 0.37887
## namesdist1_looks:stress_play1 -0.010309 0.072532 -0.142 0.88698
## namesdist2_looks:stress_play1 0.182874 0.073129 2.501 0.01239
## namestarget_looks:stress_play1 -0.039573 0.073056 -0.542 0.58804
## namesdist1_looks:syllable1 -0.264449 0.052877 -5.001 5.70e-07
## namesdist2_looks:syllable1 -0.228770 0.053211 -4.299 1.71e-05
## namestarget_looks:syllable1 0.111961 0.051916 2.157 0.03104
## namesdist1_looks:syllable2 -0.576100 0.061398 -9.383 < 2e-16
## namesdist2_looks:syllable2 -0.419772 0.060773 -6.907 4.94e-12
## namestarget_looks:syllable2 0.550645 0.056570 9.734 < 2e-16
## namesdist1_looks:syllable3 -0.362953 0.058726 -6.180 6.39e-10
## namesdist2_looks:syllable3 -0.344982 0.059209 -5.826 5.66e-09
## namestarget_looks:syllable3 0.680661 0.055045 12.365 < 2e-16
## stress_play1:syllable1 0.064657 0.072483 0.892 0.37238
## stress_play1:syllable2 0.007727 0.080899 0.096 0.92390
## stress_play1:syllable3 0.009700 0.079162 0.123 0.90248
## namesdist1_looks:stress_play1:syllable1 -0.063136 0.105754 -0.597 0.55050
## namesdist2_looks:stress_play1:syllable1 -0.197715 0.106423 -1.858 0.06319
## namestarget_looks:stress_play1:syllable1 -0.006079 0.103831 -0.059 0.95332
## namesdist1_looks:stress_play1:syllable2 0.033787 0.122796 0.275 0.78320
## namesdist2_looks:stress_play1:syllable2 -0.108914 0.121546 -0.896 0.37021
## namestarget_looks:stress_play1:syllable2 0.054556 0.113140 0.482 0.62967
## namesdist1_looks:stress_play1:syllable3 0.049859 0.117452 0.425 0.67120
## namesdist2_looks:stress_play1:syllable3 0.025804 0.118419 0.218 0.82751
## namestarget_looks:stress_play1:syllable3 -0.045756 0.110090 -0.416 0.67769
##
## (Intercept) ***
## namesdist1_looks **
## namesdist2_looks ***
## namestarget_looks ***
## stress_play1
## syllable1 *
## syllable2
## syllable3
## namesdist1_looks:stress_play1
## namesdist2_looks:stress_play1 *
## namestarget_looks:stress_play1
## namesdist1_looks:syllable1 ***
## namesdist2_looks:syllable1 ***
## namestarget_looks:syllable1 *
## namesdist1_looks:syllable2 ***
## namesdist2_looks:syllable2 ***
## namestarget_looks:syllable2 ***
## namesdist1_looks:syllable3 ***
## namesdist2_looks:syllable3 ***
## namestarget_looks:syllable3 ***
## stress_play1:syllable1
## stress_play1:syllable2
## stress_play1:syllable3
## namesdist1_looks:stress_play1:syllable1
## namesdist2_looks:stress_play1:syllable1 .
## namestarget_looks:stress_play1:syllable1
## namesdist1_looks:stress_play1:syllable2
## namesdist2_looks:stress_play1:syllable2
## namestarget_looks:stress_play1:syllable2
## namesdist1_looks:stress_play1:syllable3
## namesdist2_looks:stress_play1:syllable3
## namestarget_looks:stress_play1:syllable3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 120704 on 107323 degrees of freedom
## Residual deviance: 118904 on 107292 degrees of freedom
## AIC: 118968
##
## Number of Fisher Scoring iterations: 4
summary(comp_tar_model1)
##
## Call:
## glm(formula = values ~ stress_play * names + syllable, family = "binomial",
## data = comp_tar_longer)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.8745 -0.8084 -0.6884 -0.1173 1.8045
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -9.536e-01 1.749e-02 -54.531 < 2e-16 ***
## stress_play1 -5.629e-03 2.742e-02 -0.205 0.837364
## namesdist1_looks -3.767e-01 2.027e-02 -18.582 < 2e-16 ***
## namesdist2_looks -3.916e-01 2.033e-02 -19.259 < 2e-16 ***
## namestarget_looks 1.336e-01 1.901e-02 7.029 2.07e-12 ***
## syllable1 2.475e-15 1.862e-02 0.000 1.000000
## syllable2 -7.547e-15 2.045e-02 0.000 1.000000
## syllable3 4.123e-16 2.033e-02 0.000 1.000000
## stress_play1:namesdist1_looks 2.827e-02 4.055e-02 0.697 0.485625
## stress_play1:namesdist2_looks 1.342e-01 4.066e-02 3.301 0.000963 ***
## stress_play1:namestarget_looks -1.060e-01 3.802e-02 -2.789 0.005285 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 120704 on 107323 degrees of freedom
## Residual deviance: 119606 on 107313 degrees of freedom
## AIC: 119628
##
## Number of Fisher Scoring iterations: 4
summary(comp_tar_model2)
##
## Call:
## glm(formula = values ~ stress_play + names * syllable, family = "binomial",
## data = comp_tar_longer)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.9705 -0.7954 -0.7001 -0.1027 1.8936
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -9.886e-01 2.530e-02 -39.079 < 2e-16 ***
## stress_play1 -2.565e-14 1.454e-02 0.000 1.00000
## namesdist1_looks -1.117e-01 3.626e-02 -3.080 0.00207 **
## namesdist2_looks -1.671e-01 3.653e-02 -4.576 4.75e-06 ***
## namestarget_looks -1.658e-01 3.652e-02 -4.538 5.67e-06 ***
## syllable1 9.316e-02 3.559e-02 2.617 0.00886 **
## syllable2 6.969e-02 3.905e-02 1.785 0.07430 .
## syllable3 -3.476e-02 3.958e-02 -0.878 0.37988
## namesdist1_looks:syllable1 -2.742e-01 5.190e-02 -5.283 1.27e-07 ***
## namesdist2_looks:syllable1 -2.331e-01 5.214e-02 -4.470 7.81e-06 ***
## namestarget_looks:syllable1 1.058e-01 5.096e-02 2.075 0.03796 *
## namesdist1_looks:syllable2 -5.801e-01 5.902e-02 -9.829 < 2e-16 ***
## namesdist2_looks:syllable2 -4.345e-01 5.856e-02 -7.420 1.17e-13 ***
## namestarget_looks:syllable2 5.480e-01 5.460e-02 10.037 < 2e-16 ***
## namesdist1_looks:syllable3 -3.629e-01 5.872e-02 -6.179 6.44e-10 ***
## namesdist2_looks:syllable3 -3.440e-01 5.913e-02 -5.818 5.95e-09 ***
## namestarget_looks:syllable3 6.807e-01 5.504e-02 12.368 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 120704 on 107323 degrees of freedom
## Residual deviance: 118928 on 107307 degrees of freedom
## AIC: 118962
##
## Number of Fisher Scoring iterations: 4
summary(comp_tar_model3)
##
## Call:
## glm(formula = values ~ stress_play + names + syllable, family = "binomial",
## data = comp_tar_longer)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.8543 -0.8074 -0.6850 -0.1259 1.7750
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -9.537e-01 1.749e-02 -54.537 < 2e-16 ***
## stress_play1 1.452e-14 1.450e-02 0.000 1
## namesdist1_looks -3.766e-01 2.027e-02 -18.574 < 2e-16 ***
## namesdist2_looks -3.897e-01 2.032e-02 -19.181 < 2e-16 ***
## namestarget_looks 1.337e-01 1.901e-02 7.034 2.01e-12 ***
## syllable1 -3.122e-15 1.862e-02 0.000 1
## syllable2 -1.352e-14 2.044e-02 0.000 1
## syllable3 -1.436e-15 2.032e-02 0.000 1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 120704 on 107323 degrees of freedom
## Residual deviance: 119643 on 107316 degrees of freedom
## AIC: 119659
##
## Number of Fisher Scoring iterations: 4
anova(comp_tar_model,comp_tar_model1)
## Analysis of Deviance Table
##
## Model 1: values ~ names * stress_play * syllable
## Model 2: values ~ stress_play * names + syllable
## Resid. Df Resid. Dev Df Deviance
## 1 107292 118904
## 2 107313 119606 -21 -701.87
anova(comp_tar_model1,comp_tar_model2)
## Analysis of Deviance Table
##
## Model 1: values ~ stress_play * names + syllable
## Model 2: values ~ stress_play + names * syllable
## Resid. Df Resid. Dev Df Deviance
## 1 107313 119606
## 2 107307 118928 6 678.12
anova(comp_tar_model2,comp_tar_model3)
## Analysis of Deviance Table
##
## Model 1: values ~ stress_play + names * syllable
## Model 2: values ~ stress_play + names + syllable
## Resid. Df Resid. Dev Df Deviance
## 1 107307 118928
## 2 107316 119643 -9 -714.66
bic_model = BIC(comp_tar_model)
bic_model1 = BIC(comp_tar_model1)
bic_model2 = BIC(comp_tar_model2)
bic_model3 = BIC(comp_tar_model3)
bic_model
## [1] 119275.1
bic_model1
## [1] 119733.7
bic_model2
## [1] 119125.1
bic_model3
## [1] 119735.5
#analysis 1 (target vs competitor analysis)
library(broom)
tidy_model <- tidy(comp_tar_model2, effects = "fixed")%>%
mutate(
main = case_when(
str_detect(term, ":") ~ 1,
TRUE ~ 0))%>%
mutate(
looks = case_when(
str_detect(term, "target") ~ "target",
str_detect(term, "dist1") ~ "distractor_1",
str_detect(term, "dist2") ~ "distractor_2",
TRUE ~ "Base_looks"))%>%
mutate(
syllable = case_when(
str_detect(term, "syllable1") ~ "1",
str_detect(term, "syllable2") ~ "2",
str_detect(term, "syllable3") ~ "3",
TRUE ~ "0"))%>%
mutate(
term=str_replace(term, "syllable1","Syllable 1"),
term=str_replace(term, "syllable2","Syllable 1.5"),
term=str_replace(term, "syllable3","Syllable 2"),
term=str_replace(term, "_looks",""),
term=str_replace(term, "names","Looks "),
term=str_replace(term, "stress_play","Stress"),
term=str_replace(term, "Stress1","Stress Ante"))%>%
arrange(main, syllable, looks) %>%
mutate(
row_number = row_number(),
looks = as.factor(looks),
Significance = case_when(
p.value < 0.001 ~ "***",
p.value < 0.01 ~ "**",
p.value < 0.05 ~ "*",
TRUE ~ ""))
colors <- c("Base_looks" = "#007bc0", "distractor_1" = "#A7A7A9",
"distractor_2" = "#000000", "target" = "#c41230")
pen_anti_model<-ggplot(tidy_model, aes(x = estimate, y = row_number,color=factor(looks))) +
geom_point() +
geom_errorbarh(aes(xmin = estimate - 1.96 * std.error, xmax = estimate + 1.96 * std.error), height = 0) +
labs(title = , x = "Estimated Coefficients", y = "",color = "Looks to grouping") +
theme_minimal()+
geom_vline(xintercept = 0,color = "#007bc0")+
scale_y_continuous(breaks = tidy_model$row_number, labels = tidy_model$term)+
scale_color_manual(
values = c( "Base_looks" = "#007bc0", "distractor_1" = "#A7A7A9",
"distractor_2" = "#000000","target" = "#c41230"),
labels = c( "Base", "Distractor 1", "Distractor 2","Target")
)+
annotate("rect", xmin = -2.7, xmax = -1.45, ymin = -1.5, ymax = 17.5, fill = "white",color="grey") +
geom_text(aes(y = row_number,x=-1.5, label = term, color = factor(looks)),hjust = 1, size = 4,angle=40,show.legend = FALSE)+
xlim(-2.7, 1)+
coord_flip()+
theme(axis.text.y = element_text(angle = 15, hjust = 1),
axis.text.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank(),
axis.title.y = element_text(hjust = .75, vjust = 0.0),
axis.title.x = element_text(hjust = .0, vjust = 0),
panel.border = element_blank(),
legend.position = "bottom")+
scale_x_continuous(breaks=seq(-1, 1, by = .5))+
geom_text(aes(y = 1,x=-2.5, label = "Terms"),color = "black",hjust = 1, size = 5,angle=0,show.legend = FALSE)+
geom_text(aes(y = row_number, x = -1.2, label = Significance), hjust = 0, vjust = .5,angle=90, color = "black", show.legend = FALSE)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
pen_anti_model
## Warning in geom_text(aes(y = 1, x = -2.5, label = "Terms"), color = "black", : All aesthetics have length 1, but the data has 17 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.
#ggsave("../visuals/pen_vs_anti_pen_model.jpeg", plot = pen_anti_model, width = 10, height = 6.6, dpi = 300)
#analysis 2 (target distributional bias)
tar_longer<-comp_tar_longer%>%
filter(names=="target_looks")%>%
group_by(word_play,stress_play,syllable)%>%
summarize(tar_prop_o_looks=mean(values))%>%
filter(syllable==c(2,3))%>%ungroup()
## `summarise()` has grouped output by 'word_play', 'stress_play'. You can
## override using the `.groups` argument.
tar_ana1<-lme4::lmer(tar_prop_o_looks~stress_play*syllable+(1|word_play),data=tar_longer)
tar_ana2<-lme4::lmer(tar_prop_o_looks~stress_play+syllable+(1|word_play),data=tar_longer)
tar_ana3<-lme4::lmer(tar_prop_o_looks~syllable+(1|word_play),data=tar_longer)
tar_ana4<-lme4::lmer(tar_prop_o_looks~stress_play+(1|word_play),data=tar_longer)
summary(tar_ana1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: tar_prop_o_looks ~ stress_play * syllable + (1 | word_play)
## Data: tar_longer
##
## REML criterion at convergence: -210.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.59089 -0.43531 0.00725 0.55208 1.60197
##
## Random effects:
## Groups Name Variance Std.Dev.
## word_play (Intercept) 0.006061 0.07785
## Residual 0.001919 0.04380
## Number of obs: 96, groups: word_play, 48
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.368440 0.012893 28.576
## stress_play1 -0.004686 0.025787 -0.182
## syllable3 0.007875 0.008941 0.881
## stress_play1:syllable3 -0.021011 0.017882 -1.175
##
## Correlation of Fixed Effects:
## (Intr) strs_1 syllb3
## stress_ply1 0.000
## syllable3 -0.347 0.000
## strss_pl1:3 0.000 -0.347 0.000
summary(tar_ana2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: tar_prop_o_looks ~ stress_play + syllable + (1 | word_play)
## Data: tar_longer
##
## REML criterion at convergence: -215
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.51246 -0.52886 -0.02352 0.52863 1.71578
##
## Random effects:
## Groups Name Variance Std.Dev.
## word_play (Intercept) 0.006053 0.07780
## Residual 0.001934 0.04398
## Number of obs: 96, groups: word_play, 48
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.368440 0.012900 28.562
## stress_play1 -0.015191 0.024187 -0.628
## syllable3 0.007875 0.008977 0.877
##
## Correlation of Fixed Effects:
## (Intr) strs_1
## stress_ply1 0.000
## syllable3 -0.348 0.000
summary(tar_ana3)
## Linear mixed model fit by REML ['lmerMod']
## Formula: tar_prop_o_looks ~ syllable + (1 | word_play)
## Data: tar_longer
##
## REML criterion at convergence: -220.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.53523 -0.51694 -0.01162 0.55191 1.69300
##
## Random effects:
## Groups Name Variance Std.Dev.
## word_play (Intercept) 0.005963 0.07722
## Residual 0.001934 0.04398
## Number of obs: 96, groups: word_play, 48
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.368440 0.012826 28.725
## syllable3 0.007875 0.008977 0.877
##
## Correlation of Fixed Effects:
## (Intr)
## syllable3 -0.350
summary(tar_ana4)
## Linear mixed model fit by REML ['lmerMod']
## Formula: tar_prop_o_looks ~ stress_play + (1 | word_play)
## Data: tar_longer
##
## REML criterion at convergence: -221.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.55896 -0.47448 0.04201 0.48256 1.62967
##
## Random effects:
## Groups Name Variance Std.Dev.
## word_play (Intercept) 0.006058 0.07783
## Residual 0.001925 0.04387
## Number of obs: 96, groups: word_play, 48
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.37238 0.01209 30.792
## stress_play1 -0.01519 0.02419 -0.628
##
## Correlation of Fixed Effects:
## (Intr)
## stress_ply1 0.000
#no significant results here- no bias
########analysis 3 (acoustics)
acoustic_data<-read.csv("acoustic_data_ppcc.csv")
aco_agg<-acoustic_data%>%
group_by(stress,vowel)%>%
na.omit()%>%
summarize(mean_pitch=mean(mean_pitch),
mean_amplitude=mean(mean_amplitude),
mean_dur=mean(duration)*1000,
mean_tilt=mean(mean_tilt))
## `summarise()` has grouped output by 'stress'. You can override using the
## `.groups` argument.
acoustic_data<-acoustic_data%>%
mutate(
stress_play = ifelse(stress == "antipenultimate", "Anti-penultimate", "Penultimate"),
syllable = ifelse(vowel == "v1", 1, 3),
syllable=as.factor(syllable),
word=tolower(word))%>%
select(-X)
penn_accoustic_data<-acoustic_data%>%
filter(stress_play=="Penultimate")
ante_penn_accoustic_data<-acoustic_data%>%
filter(stress_play!="Penultimate")
levels(penn_accoustic_data$syllable)
## [1] "1" "3"
# Create a function to run t-tests for each predictor
run_t_tests <- function(data, predictors, group_var) {
results <- data.frame(Predictor = character(), T_value = numeric(), P_value = numeric(), Significance = character(), stringsAsFactors = FALSE)
for (predictor in predictors) {
cat("Running t-tests for predictor:", predictor, "\n")
formula <- as.formula(paste(predictor, "~", group_var))
t_test_result <- t.test(formula, data = data)
# Determine significance level
p_value <- t_test_result$p.value
significance <- if (p_value < 0.001) {
"***"
} else if (p_value < 0.01) {
"**"
} else if (p_value < 0.05) {
"*"
} else {
""
}
# Append results to dataframe
results <- rbind(results, data.frame(Predictor = predictor, T_value = t_test_result$statistic, P_value = p_value, Significance = significance))
}
return(results)
}
# Define the predictors and group variable
predictors <- c("mean_amplitude", "mean_pitch", "duration","mean_tilt")
group_var <- "syllable"
# Run t-tests
t_test_results_penn <- run_t_tests(penn_accoustic_data, predictors, group_var)
## Running t-tests for predictor: mean_amplitude
## Running t-tests for predictor: mean_pitch
## Running t-tests for predictor: duration
## Running t-tests for predictor: mean_tilt
t_test_results_ante_penn <- run_t_tests(ante_penn_accoustic_data, predictors, group_var)
## Running t-tests for predictor: mean_amplitude
## Running t-tests for predictor: mean_pitch
## Running t-tests for predictor: duration
## Running t-tests for predictor: mean_tilt
# Print the results
t_test_results_penn
## Predictor T_value P_value Significance
## t mean_amplitude 5.1787925 6.681214e-07 ***
## t1 mean_pitch 6.9926294 7.017645e-11 ***
## t2 duration -20.8351541 5.296237e-40 ***
## t3 mean_tilt 0.3321044 7.403028e-01
t_test_results_ante_penn
## Predictor T_value P_value Significance
## t mean_amplitude 7.507733 1.481562e-11 ***
## t1 mean_pitch 13.171503 2.130472e-22 ***
## t2 duration 13.111704 8.801271e-23 ***
## t3 mean_tilt 8.256081 2.160439e-13 ***
#analysis 3 target fixation analysis
ets_binary_analysis_syl_1<-comp_tar_longer%>%
pivot_wider(names_from = names,values_from = values)%>%
filter(syllable!=2)%>%
filter(syllable!=0)%>%
mutate(word=target)%>%
left_join(acoustic_data)%>%
filter(syllable==1)%>%
mutate(mean_pitch_scaled = scale(mean_pitch),
mean_amplitude_scaled = scale(mean_amplitude),
mean_tilt_scaled = scale(mean_tilt),
duration_scaled = scale(duration))
## Joining with `by = join_by(stress_play, syllable, word)`
ap_data_1<-ets_binary_analysis_syl_1%>%filter(stress_play!="Penultimate")
pp_data_1<-ets_binary_analysis_syl_1%>%filter(stress_play=="Penultimate")
mod_full<-glmer(target_looks~mean_pitch_scaled*mean_amplitude_scaled*mean_tilt_scaled*duration_scaled
+(1|word),family=binomial, data= ap_data_1)
## boundary (singular) fit: see help('isSingular')
mod_full_2<-glmer(target_looks~mean_pitch_scaled*mean_amplitude_scaled*mean_tilt_scaled*duration_scaled
+(1|word),family=binomial, data= pp_data_1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00802168 (tol = 0.002, component 1)
ets_binary_analysis_syl_3<-comp_tar_longer%>%
pivot_wider(names_from = names,values_from = values)%>%
filter(syllable!=2)%>%
filter(syllable!=0)%>%
mutate(word=target)%>%
left_join(acoustic_data)%>%
filter(syllable==3)%>%
mutate(mean_pitch_scaled = scale(mean_pitch),
mean_amplitude_scaled = scale(mean_amplitude),
mean_tilt_scaled = scale(mean_tilt),
duration_scaled = scale(duration))
## Joining with `by = join_by(stress_play, syllable, word)`
ap_data_3<-ets_binary_analysis_syl_3%>%filter(stress_play!="Penultimate")
pp_data_3<-ets_binary_analysis_syl_3%>%filter(stress_play=="Penultimate")
mod_full_1_3<-glmer(target_looks~mean_pitch_scaled*mean_amplitude_scaled*mean_tilt_scaled*duration_scaled
+(1|word),family=binomial, data= ap_data_3)
## boundary (singular) fit: see help('isSingular')
mod_full_2_3<-glmer(target_looks~mean_pitch_scaled*mean_amplitude_scaled*mean_tilt_scaled*duration_scaled
+(1|word),family=binomial, data= pp_data_3)
## boundary (singular) fit: see help('isSingular')
summary(mod_full)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## target_looks ~ mean_pitch_scaled * mean_amplitude_scaled * mean_tilt_scaled *
## duration_scaled + (1 | word)
## Data: ap_data_1
##
## AIC BIC logLik deviance df.resid
## 5508.4 5618.2 -2737.2 5474.4 4718
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -0.7721 -0.6542 -0.5760 1.3696 2.4321
##
## Random effects:
## Groups Name Variance Std.Dev.
## word (Intercept) 0 0
## Number of obs: 4735, groups: word, 23
##
## Fixed effects:
## Estimate
## (Intercept) -1.35736
## mean_pitch_scaled 0.05746
## mean_amplitude_scaled 0.11058
## mean_tilt_scaled 0.43284
## duration_scaled 0.27806
## mean_pitch_scaled:mean_amplitude_scaled 0.20027
## mean_pitch_scaled:mean_tilt_scaled 0.08976
## mean_amplitude_scaled:mean_tilt_scaled 0.65550
## mean_pitch_scaled:duration_scaled 0.10331
## mean_amplitude_scaled:duration_scaled -0.12135
## mean_tilt_scaled:duration_scaled -0.29873
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled 0.21292
## mean_pitch_scaled:mean_amplitude_scaled:duration_scaled 0.30921
## mean_pitch_scaled:mean_tilt_scaled:duration_scaled -0.59884
## mean_amplitude_scaled:mean_tilt_scaled:duration_scaled -0.89933
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled:duration_scaled -0.75100
## Std. Error
## (Intercept) 0.13196
## mean_pitch_scaled 0.14945
## mean_amplitude_scaled 0.22998
## mean_tilt_scaled 0.13597
## duration_scaled 0.09177
## mean_pitch_scaled:mean_amplitude_scaled 0.28133
## mean_pitch_scaled:mean_tilt_scaled 0.16733
## mean_amplitude_scaled:mean_tilt_scaled 0.17107
## mean_pitch_scaled:duration_scaled 0.10915
## mean_amplitude_scaled:duration_scaled 0.17742
## mean_tilt_scaled:duration_scaled 0.14827
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled 0.13804
## mean_pitch_scaled:mean_amplitude_scaled:duration_scaled 0.21837
## mean_pitch_scaled:mean_tilt_scaled:duration_scaled 0.15568
## mean_amplitude_scaled:mean_tilt_scaled:duration_scaled 0.20104
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled:duration_scaled 0.26374
## z value
## (Intercept) -10.286
## mean_pitch_scaled 0.384
## mean_amplitude_scaled 0.481
## mean_tilt_scaled 3.183
## duration_scaled 3.030
## mean_pitch_scaled:mean_amplitude_scaled 0.712
## mean_pitch_scaled:mean_tilt_scaled 0.536
## mean_amplitude_scaled:mean_tilt_scaled 3.832
## mean_pitch_scaled:duration_scaled 0.947
## mean_amplitude_scaled:duration_scaled -0.684
## mean_tilt_scaled:duration_scaled -2.015
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled 1.542
## mean_pitch_scaled:mean_amplitude_scaled:duration_scaled 1.416
## mean_pitch_scaled:mean_tilt_scaled:duration_scaled -3.847
## mean_amplitude_scaled:mean_tilt_scaled:duration_scaled -4.473
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled:duration_scaled -2.847
## Pr(>|z|)
## (Intercept) < 2e-16
## mean_pitch_scaled 0.700645
## mean_amplitude_scaled 0.630659
## mean_tilt_scaled 0.001456
## duration_scaled 0.002446
## mean_pitch_scaled:mean_amplitude_scaled 0.476545
## mean_pitch_scaled:mean_tilt_scaled 0.591692
## mean_amplitude_scaled:mean_tilt_scaled 0.000127
## mean_pitch_scaled:duration_scaled 0.343882
## mean_amplitude_scaled:duration_scaled 0.494018
## mean_tilt_scaled:duration_scaled 0.043928
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled 0.122957
## mean_pitch_scaled:mean_amplitude_scaled:duration_scaled 0.156769
## mean_pitch_scaled:mean_tilt_scaled:duration_scaled 0.000120
## mean_amplitude_scaled:mean_tilt_scaled:duration_scaled 7.7e-06
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled:duration_scaled 0.004407
##
## (Intercept) ***
## mean_pitch_scaled
## mean_amplitude_scaled
## mean_tilt_scaled **
## duration_scaled **
## mean_pitch_scaled:mean_amplitude_scaled
## mean_pitch_scaled:mean_tilt_scaled
## mean_amplitude_scaled:mean_tilt_scaled ***
## mean_pitch_scaled:duration_scaled
## mean_amplitude_scaled:duration_scaled
## mean_tilt_scaled:duration_scaled *
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled
## mean_pitch_scaled:mean_amplitude_scaled:duration_scaled
## mean_pitch_scaled:mean_tilt_scaled:duration_scaled ***
## mean_amplitude_scaled:mean_tilt_scaled:duration_scaled ***
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled:duration_scaled **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 16 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
summary(mod_full_2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## target_looks ~ mean_pitch_scaled * mean_amplitude_scaled * mean_tilt_scaled *
## duration_scaled + (1 | word)
## Data: pp_data_1
##
## AIC BIC logLik deviance df.resid
## 3358.3 3459.6 -1662.2 3324.3 2842
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -0.8893 -0.6752 -0.5326 1.2260 2.2371
##
## Random effects:
## Groups Name Variance Std.Dev.
## word (Intercept) 0.01422 0.1193
## Number of obs: 2859, groups: word, 24
##
## Fixed effects:
## Estimate
## (Intercept) -1.20754
## mean_pitch_scaled 0.39125
## mean_amplitude_scaled -2.40163
## mean_tilt_scaled -0.05164
## duration_scaled -0.09770
## mean_pitch_scaled:mean_amplitude_scaled -2.49809
## mean_pitch_scaled:mean_tilt_scaled -0.15619
## mean_amplitude_scaled:mean_tilt_scaled -3.19909
## mean_pitch_scaled:duration_scaled 0.63661
## mean_amplitude_scaled:duration_scaled -2.45907
## mean_tilt_scaled:duration_scaled -0.08173
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled -1.30310
## mean_pitch_scaled:mean_amplitude_scaled:duration_scaled -2.53978
## mean_pitch_scaled:mean_tilt_scaled:duration_scaled -0.06819
## mean_amplitude_scaled:mean_tilt_scaled:duration_scaled -3.15979
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled:duration_scaled -1.28875
## Std. Error
## (Intercept) 0.74381
## mean_pitch_scaled 0.80779
## mean_amplitude_scaled 3.59167
## mean_tilt_scaled 0.94677
## duration_scaled 0.81161
## mean_pitch_scaled:mean_amplitude_scaled 3.67626
## mean_pitch_scaled:mean_tilt_scaled 1.04027
## mean_amplitude_scaled:mean_tilt_scaled 3.34152
## mean_pitch_scaled:duration_scaled 0.86995
## mean_amplitude_scaled:duration_scaled 3.71385
## mean_tilt_scaled:duration_scaled 0.88847
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled 3.45882
## mean_pitch_scaled:mean_amplitude_scaled:duration_scaled 3.73623
## mean_pitch_scaled:mean_tilt_scaled:duration_scaled 0.97625
## mean_amplitude_scaled:mean_tilt_scaled:duration_scaled 3.23502
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled:duration_scaled 3.27218
## z value
## (Intercept) -1.623
## mean_pitch_scaled 0.484
## mean_amplitude_scaled -0.669
## mean_tilt_scaled -0.055
## duration_scaled -0.120
## mean_pitch_scaled:mean_amplitude_scaled -0.680
## mean_pitch_scaled:mean_tilt_scaled -0.150
## mean_amplitude_scaled:mean_tilt_scaled -0.957
## mean_pitch_scaled:duration_scaled 0.732
## mean_amplitude_scaled:duration_scaled -0.662
## mean_tilt_scaled:duration_scaled -0.092
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled -0.377
## mean_pitch_scaled:mean_amplitude_scaled:duration_scaled -0.680
## mean_pitch_scaled:mean_tilt_scaled:duration_scaled -0.070
## mean_amplitude_scaled:mean_tilt_scaled:duration_scaled -0.977
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled:duration_scaled -0.394
## Pr(>|z|)
## (Intercept) 0.104
## mean_pitch_scaled 0.628
## mean_amplitude_scaled 0.504
## mean_tilt_scaled 0.957
## duration_scaled 0.904
## mean_pitch_scaled:mean_amplitude_scaled 0.497
## mean_pitch_scaled:mean_tilt_scaled 0.881
## mean_amplitude_scaled:mean_tilt_scaled 0.338
## mean_pitch_scaled:duration_scaled 0.464
## mean_amplitude_scaled:duration_scaled 0.508
## mean_tilt_scaled:duration_scaled 0.927
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled 0.706
## mean_pitch_scaled:mean_amplitude_scaled:duration_scaled 0.497
## mean_pitch_scaled:mean_tilt_scaled:duration_scaled 0.944
## mean_amplitude_scaled:mean_tilt_scaled:duration_scaled 0.329
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled:duration_scaled 0.694
##
## Correlation matrix not shown by default, as p = 16 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.00802168 (tol = 0.002, component 1)
summary(mod_full_1_3)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## target_looks ~ mean_pitch_scaled * mean_amplitude_scaled * mean_tilt_scaled *
## duration_scaled + (1 | word)
## Data: ap_data_3
##
## AIC BIC logLik deviance df.resid
## 3480.8 3581.0 -1723.4 3446.8 2660
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.0057 -0.7770 -0.6576 1.2314 1.8784
##
## Random effects:
## Groups Name Variance Std.Dev.
## word (Intercept) 2.595e-14 1.611e-07
## Number of obs: 2677, groups: word, 23
##
## Fixed effects:
## Estimate
## (Intercept) -6.6805
## mean_pitch_scaled -7.3133
## mean_amplitude_scaled -7.5312
## mean_tilt_scaled 6.8617
## duration_scaled -5.3622
## mean_pitch_scaled:mean_amplitude_scaled -7.9596
## mean_pitch_scaled:mean_tilt_scaled 7.7238
## mean_amplitude_scaled:mean_tilt_scaled -0.3461
## mean_pitch_scaled:duration_scaled -6.4490
## mean_amplitude_scaled:duration_scaled -7.2997
## mean_tilt_scaled:duration_scaled 4.8232
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled 0.2661
## mean_pitch_scaled:mean_amplitude_scaled:duration_scaled -7.8219
## mean_pitch_scaled:mean_tilt_scaled:duration_scaled 5.4536
## mean_amplitude_scaled:mean_tilt_scaled:duration_scaled -2.1295
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled:duration_scaled -1.8007
## Std. Error
## (Intercept) 2.0526
## mean_pitch_scaled 2.8747
## mean_amplitude_scaled 2.3077
## mean_tilt_scaled 7.2037
## duration_scaled 1.9872
## mean_pitch_scaled:mean_amplitude_scaled 2.3649
## mean_pitch_scaled:mean_tilt_scaled 8.6769
## mean_amplitude_scaled:mean_tilt_scaled 5.8816
## mean_pitch_scaled:duration_scaled 2.7610
## mean_amplitude_scaled:duration_scaled 2.2047
## mean_tilt_scaled:duration_scaled 6.6331
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled 8.5534
## mean_pitch_scaled:mean_amplitude_scaled:duration_scaled 2.3461
## mean_pitch_scaled:mean_tilt_scaled:duration_scaled 7.9323
## mean_amplitude_scaled:mean_tilt_scaled:duration_scaled 5.3915
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled:duration_scaled 7.7251
## z value
## (Intercept) -3.255
## mean_pitch_scaled -2.544
## mean_amplitude_scaled -3.264
## mean_tilt_scaled 0.953
## duration_scaled -2.698
## mean_pitch_scaled:mean_amplitude_scaled -3.366
## mean_pitch_scaled:mean_tilt_scaled 0.890
## mean_amplitude_scaled:mean_tilt_scaled -0.059
## mean_pitch_scaled:duration_scaled -2.336
## mean_amplitude_scaled:duration_scaled -3.311
## mean_tilt_scaled:duration_scaled 0.727
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled 0.031
## mean_pitch_scaled:mean_amplitude_scaled:duration_scaled -3.334
## mean_pitch_scaled:mean_tilt_scaled:duration_scaled 0.688
## mean_amplitude_scaled:mean_tilt_scaled:duration_scaled -0.395
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled:duration_scaled -0.233
## Pr(>|z|)
## (Intercept) 0.001136
## mean_pitch_scaled 0.010959
## mean_amplitude_scaled 0.001100
## mean_tilt_scaled 0.340833
## duration_scaled 0.006968
## mean_pitch_scaled:mean_amplitude_scaled 0.000764
## mean_pitch_scaled:mean_tilt_scaled 0.373379
## mean_amplitude_scaled:mean_tilt_scaled 0.953073
## mean_pitch_scaled:duration_scaled 0.019506
## mean_amplitude_scaled:duration_scaled 0.000930
## mean_tilt_scaled:duration_scaled 0.467142
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled 0.975177
## mean_pitch_scaled:mean_amplitude_scaled:duration_scaled 0.000856
## mean_pitch_scaled:mean_tilt_scaled:duration_scaled 0.491754
## mean_amplitude_scaled:mean_tilt_scaled:duration_scaled 0.692860
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled:duration_scaled 0.815682
##
## (Intercept) **
## mean_pitch_scaled *
## mean_amplitude_scaled **
## mean_tilt_scaled
## duration_scaled **
## mean_pitch_scaled:mean_amplitude_scaled ***
## mean_pitch_scaled:mean_tilt_scaled
## mean_amplitude_scaled:mean_tilt_scaled
## mean_pitch_scaled:duration_scaled *
## mean_amplitude_scaled:duration_scaled ***
## mean_tilt_scaled:duration_scaled
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled
## mean_pitch_scaled:mean_amplitude_scaled:duration_scaled ***
## mean_pitch_scaled:mean_tilt_scaled:duration_scaled
## mean_amplitude_scaled:mean_tilt_scaled:duration_scaled
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled:duration_scaled
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 16 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
summary(mod_full_2_3)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## target_looks ~ mean_pitch_scaled * mean_amplitude_scaled * mean_tilt_scaled *
## duration_scaled + (1 | word)
## Data: pp_data_3
##
## AIC BIC logLik deviance df.resid
## 3670.9 3771.6 -1818.4 3636.9 2744
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.0486 -0.8021 -0.6854 1.1248 1.6062
##
## Random effects:
## Groups Name Variance Std.Dev.
## word (Intercept) 1.03e-13 3.21e-07
## Number of obs: 2761, groups: word, 24
##
## Fixed effects:
## Estimate
## (Intercept) -0.52557
## mean_pitch_scaled 0.23788
## mean_amplitude_scaled 0.25235
## mean_tilt_scaled 0.37625
## duration_scaled -0.06645
## mean_pitch_scaled:mean_amplitude_scaled -0.25924
## mean_pitch_scaled:mean_tilt_scaled -0.32735
## mean_amplitude_scaled:mean_tilt_scaled -0.08979
## mean_pitch_scaled:duration_scaled 0.10687
## mean_amplitude_scaled:duration_scaled -0.09348
## mean_tilt_scaled:duration_scaled -0.38639
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled 0.39126
## mean_pitch_scaled:mean_amplitude_scaled:duration_scaled -0.25763
## mean_pitch_scaled:mean_tilt_scaled:duration_scaled 0.59576
## mean_amplitude_scaled:mean_tilt_scaled:duration_scaled 0.09401
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled:duration_scaled -0.33147
## Std. Error
## (Intercept) 0.43540
## mean_pitch_scaled 0.28385
## mean_amplitude_scaled 0.51760
## mean_tilt_scaled 0.50162
## duration_scaled 0.34138
## mean_pitch_scaled:mean_amplitude_scaled 0.44601
## mean_pitch_scaled:mean_tilt_scaled 0.32803
## mean_amplitude_scaled:mean_tilt_scaled 0.48944
## mean_pitch_scaled:duration_scaled 0.21838
## mean_amplitude_scaled:duration_scaled 0.44712
## mean_tilt_scaled:duration_scaled 0.48212
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled 0.34162
## mean_pitch_scaled:mean_amplitude_scaled:duration_scaled 0.63138
## mean_pitch_scaled:mean_tilt_scaled:duration_scaled 0.54071
## mean_amplitude_scaled:mean_tilt_scaled:duration_scaled 0.33543
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled:duration_scaled 0.31459
## z value
## (Intercept) -1.207
## mean_pitch_scaled 0.838
## mean_amplitude_scaled 0.488
## mean_tilt_scaled 0.750
## duration_scaled -0.195
## mean_pitch_scaled:mean_amplitude_scaled -0.581
## mean_pitch_scaled:mean_tilt_scaled -0.998
## mean_amplitude_scaled:mean_tilt_scaled -0.183
## mean_pitch_scaled:duration_scaled 0.489
## mean_amplitude_scaled:duration_scaled -0.209
## mean_tilt_scaled:duration_scaled -0.801
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled 1.145
## mean_pitch_scaled:mean_amplitude_scaled:duration_scaled -0.408
## mean_pitch_scaled:mean_tilt_scaled:duration_scaled 1.102
## mean_amplitude_scaled:mean_tilt_scaled:duration_scaled 0.280
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled:duration_scaled -1.054
## Pr(>|z|)
## (Intercept) 0.227
## mean_pitch_scaled 0.402
## mean_amplitude_scaled 0.626
## mean_tilt_scaled 0.453
## duration_scaled 0.846
## mean_pitch_scaled:mean_amplitude_scaled 0.561
## mean_pitch_scaled:mean_tilt_scaled 0.318
## mean_amplitude_scaled:mean_tilt_scaled 0.854
## mean_pitch_scaled:duration_scaled 0.625
## mean_amplitude_scaled:duration_scaled 0.834
## mean_tilt_scaled:duration_scaled 0.423
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled 0.252
## mean_pitch_scaled:mean_amplitude_scaled:duration_scaled 0.683
## mean_pitch_scaled:mean_tilt_scaled:duration_scaled 0.271
## mean_amplitude_scaled:mean_tilt_scaled:duration_scaled 0.779
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled:duration_scaled 0.292
##
## Correlation matrix not shown by default, as p = 16 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
#competitor looks analyses
#
#comp_mod_full<-glmer(competitor_looks~mean_pitch_scaled*mean_amplitude_scaled*mean_tilt_scaled*duration_scaled
# +(1|word),family=binomial, data= ap_data)
#comp_mod_full_2<-glmer(competitor_looks~mean_pitch_scaled*mean_amplitude_scaled*mean_tilt_scaled*duration_scaled
# +(1|word),family=binomial, data= pp_data)
#comp_mod_full_1_3<-glmer(competitor_looks~mean_pitch_scaled*mean_amplitude_scaled*mean_tilt_scaled*duration_scaled
# +(1|word),family=binomial, data= ap_data_3)
#comp_mod_full_2_3<-glmer(competitor_looks~mean_pitch_scaled*mean_amplitude_scaled*mean_tilt_scaled*duration_scaled
# +(1|word),family=binomial, data= pp_data_3)
#summary(comp_mod_full)
#summary(comp_mod_full_2)
#summary(comp_mod_full_1_3)
#summary(comp_mod_full_2_3)
#individual differences for target and competitor fixations
comp_tar_longer_id<-comp_tar_longer%>%
left_join(battery_agg_wider_simp)%>%
left_join(digit_cleaned_simp)%>%
left_join(lextale_agg_cleaned_simp)%>%
left_join(italian_lextale_agg_cleaned_simp)%>%
left_join(autism_cleaned)%>%
filter(syllable!=0)%>%
filter(names=="target_looks")
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
comp_tar_longer_id_penn<-comp_tar_longer_id%>%filter(stress_play=="Penultimate")
comp_tar_longer_id_a_penn<-comp_tar_longer_id%>%filter(stress_play!="Penultimate")
lASSO Function
fit_models <- function(data, stress_filter) {
# Filter the data
data_filtered <- data %>% filter(stress_play == stress_filter)
data_filtered <- na.omit(data_filtered)
# Create the design matrix and response vector
x <- model.matrix(values ~ syllable * (dprime_battery_up_dur + dprime_battery_up_ptich + dprime_battery_up_risetime + dprime_battery_up_formants + lextale_score + autism_score + italian_lextale_score + lextale_score), data = data_filtered)[, -1]
y <- data_filtered$values
# Set up lambda values for LASSO regression
lambda_search_space <- 10^seq(10, -2, length = 100)
# Fit the LASSO regression model
lasso_model <- glmnet(x, y, alpha = 1, lambda = lambda_search_space, family = "binomial")
# Cross-validation to find the best lambda
cv <- cv.glmnet(x, y, alpha = 1, family = "binomial")
best_lambda <- cv$lambda.min
# Extract coefficients for the best lambda
coef_lasso_best <- coef(lasso_model, s = best_lambda)
# Convert coefficients to data frame for visualization
coef_df <- as.data.frame(as.matrix(coef_lasso_best))
coef_df$Predictor <- rownames(coef_df)
rownames(coef_df) <- NULL
# Visualize the coefficients
coef_plot <- ggplot(coef_df, aes(x = Predictor, y = s1)) +
geom_bar(stat = "identity") +
coord_flip() +
labs(title = paste("LASSO Regression Coefficients at Best Lambda (", round(best_lambda, 4), ")", sep = ""),
x = "Predictor",
y = "Coefficient Value") +
theme_minimal()
print(coef_plot)
# Select significant variables based on LASSO coefficients
significant_vars <- coef_df %>%
filter(s1 != 0) %>%
pull(Predictor)
# Clean up the variable names by replacing syllable1, syllable2, and syllable3 with syllable
significant_vars <- gsub("syllable[123]", "syllable", significant_vars)
# Remove duplicate variables
significant_vars <- unique(significant_vars)
# Prepare formula for GLMM using significant variables
significant_vars <- significant_vars[significant_vars != "(Intercept)"] # Remove intercept if present
formula_str <- paste("values ~", paste(significant_vars, collapse = " + "), "+ (1|word_play)")
# Convert the string to a formula object
formula <- as.formula(formula_str)
# Fit GLMM with selected variables
model_selected <- glmer(formula, data = data_filtered, family = "binomial")
# Print the summary of the GLMM
print(summary(model_selected))
# Return the summary and the plot
list(summary = summary(model_selected), plot = coef_plot)
}
comp_tar_longer_id <- comp_tar_longer %>%
left_join(battery_agg_wider_simp) %>%
left_join(digit_cleaned_simp) %>%
left_join(lextale_agg_cleaned_simp) %>%
left_join(italian_lextale_agg_cleaned_simp) %>%
left_join(autism_cleaned) %>%
filter(syllable != 0) %>%
filter(names == "target_looks")
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
# Fit models for Penultimate stress
summary_penultimate <- fit_models(comp_tar_longer_id, "Penultimate")
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.0081802 (tol = 0.002, component 1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## values ~ lextale_score + italian_lextale_score + syllable:dprime_battery_up_dur +
## syllable:lextale_score + syllable:autism_score + syllable:italian_lextale_score +
## (1 | word_play)
## Data: data_filtered
##
## AIC BIC logLik deviance df.resid
## 10866.7 10965.6 -5419.4 10838.7 8635
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.3423 -0.7284 -0.5918 1.1495 2.3940
##
## Random effects:
## Groups Name Variance Std.Dev.
## word_play (Intercept) 0.1483 0.3851
## Number of obs: 8649, groups: word_play, 24
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.02711 1.22079 -4.118 3.82e-05 ***
## lextale_score -0.04220 0.38805 -0.109 0.913405
## italian_lextale_score 4.23226 1.27848 3.310 0.000932 ***
## syllable1:dprime_battery_up_dur -0.31647 0.46465 -0.681 0.495811
## syllable2:dprime_battery_up_dur -0.08322 0.38092 -0.218 0.827054
## syllable3:dprime_battery_up_dur -0.80535 0.43533 -1.850 0.064314 .
## lextale_score:syllable2 0.83900 0.50270 1.669 0.095119 .
## lextale_score:syllable3 0.78831 0.53056 1.486 0.137334
## syllable1:autism_score -0.22055 0.31420 -0.702 0.482711
## syllable2:autism_score 0.45215 0.25938 1.743 0.081296 .
## syllable3:autism_score 0.47694 0.29560 1.613 0.106645
## italian_lextale_score:syllable2 -0.52111 0.44830 -1.162 0.245072
## italian_lextale_score:syllable3 -0.47563 0.47568 -1.000 0.317364
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 13 > 12.
## Use print(summary(model_selected), correlation=TRUE) or
## vcov(summary(model_selected)) if you need it
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.0081802 (tol = 0.002, component 1)
# Fit models for non-Penultimate stress
summary_anti_penultimate <- fit_models(comp_tar_longer_id, "Anti-penultimate")
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00211687 (tol = 0.002, component 1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## values ~ syllable + dprime_battery_up_dur + lextale_score + syllable:dprime_battery_up_dur +
## syllable:dprime_battery_up_ptich + syllable:lextale_score +
## (1 | word_play)
## Data: data_filtered
##
## AIC BIC logLik deviance df.resid
## 10870.6 10962.7 -5422.3 10844.6 8806
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.3772 -0.7079 -0.5853 1.1847 2.6029
##
## Random effects:
## Groups Name Variance Std.Dev.
## word_play (Intercept) 0.07606 0.2758
## Number of obs: 8819, groups: word_play, 24
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.5634 0.2436 -6.419 1.37e-10 ***
## syllable2 -0.4597 0.4323 -1.064 0.2875
## syllable3 -0.5004 0.3779 -1.324 0.1854
## dprime_battery_up_dur -1.7392 0.3613 -4.814 1.48e-06 ***
## lextale_score 0.6349 0.2927 2.169 0.0301 *
## syllable2:dprime_battery_up_dur -0.7630 0.6721 -1.135 0.2562
## syllable3:dprime_battery_up_dur -0.4225 0.5793 -0.729 0.4658
## syllable1:dprime_battery_up_ptich 0.8879 0.2138 4.154 3.27e-05 ***
## syllable2:dprime_battery_up_ptich 0.7265 0.3539 2.053 0.0401 *
## syllable3:dprime_battery_up_ptich 0.4800 0.2869 1.673 0.0943 .
## syllable2:lextale_score 0.9517 0.5350 1.779 0.0753 .
## syllable3:lextale_score 0.9875 0.4674 2.113 0.0346 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) syllb2 syllb3 dpr___ lxtl_s
## syllable2 -0.533
## syllable3 -0.610 0.343
## dprm_bttr__ 0.134 -0.076 -0.086
## lextale_scr -0.940 0.529 0.606 0.046
## syllbl2:dprm_bttry_p_d -0.072 0.150 0.046 -0.539 -0.024
## syllbl3:dprm_bttry_p_d -0.083 0.047 0.147 -0.624 -0.029
## syllbl1:___ -0.038 0.021 0.024 -0.198 0.117
## syllbl2:dprm_bttry_p_p 0.000 -0.059 0.001 0.000 0.000
## syllbl3:dprm_bttry_p_p 0.000 0.000 -0.057 0.000 0.000
## syllbl2:lx_ 0.514 -0.965 -0.331 -0.025 -0.546
## syllbl3:lx_ 0.589 -0.331 -0.966 -0.029 -0.627
## syllbl2:dprm_bttry_p_d syllbl3:dprm_bttry_p_d s1:___
## syllable2
## syllable3
## dprm_bttr__
## lextale_scr
## syllbl2:dprm_bttry_p_d
## syllbl3:dprm_bttry_p_d 0.337
## syllbl1:___ 0.107 0.124
## syllbl2:dprm_bttry_p_p -0.200 0.000 0.000
## syllbl3:dprm_bttry_p_p 0.000 -0.180 0.000
## syllbl2:lx_ 0.030 0.016 -0.064
## syllbl3:lx_ 0.016 0.030 -0.073
## syllbl2:dprm_bttry_p_p syllbl3:dprm_bttry_p_p syl2:_
## syllable2
## syllable3
## dprm_bttr__
## lextale_scr
## syllbl2:dprm_bttry_p_d
## syllbl3:dprm_bttry_p_d
## syllbl1:___
## syllbl2:dprm_bttry_p_p
## syllbl3:dprm_bttry_p_p 0.000
## syllbl2:lx_ 0.132 0.000
## syllbl3:lx_ -0.001 0.128 0.342
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.00211687 (tol = 0.002, component 1)
summary_penultimate$summary
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## values ~ lextale_score + italian_lextale_score + syllable:dprime_battery_up_dur +
## syllable:lextale_score + syllable:autism_score + syllable:italian_lextale_score +
## (1 | word_play)
## Data: data_filtered
##
## AIC BIC logLik deviance df.resid
## 10866.7 10965.6 -5419.4 10838.7 8635
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.3423 -0.7284 -0.5918 1.1495 2.3940
##
## Random effects:
## Groups Name Variance Std.Dev.
## word_play (Intercept) 0.1483 0.3851
## Number of obs: 8649, groups: word_play, 24
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.02711 1.22079 -4.118 3.82e-05 ***
## lextale_score -0.04220 0.38805 -0.109 0.913405
## italian_lextale_score 4.23226 1.27848 3.310 0.000932 ***
## syllable1:dprime_battery_up_dur -0.31647 0.46465 -0.681 0.495811
## syllable2:dprime_battery_up_dur -0.08322 0.38092 -0.218 0.827054
## syllable3:dprime_battery_up_dur -0.80535 0.43533 -1.850 0.064314 .
## lextale_score:syllable2 0.83900 0.50270 1.669 0.095119 .
## lextale_score:syllable3 0.78831 0.53056 1.486 0.137334
## syllable1:autism_score -0.22055 0.31420 -0.702 0.482711
## syllable2:autism_score 0.45215 0.25938 1.743 0.081296 .
## syllable3:autism_score 0.47694 0.29560 1.613 0.106645
## italian_lextale_score:syllable2 -0.52111 0.44830 -1.162 0.245072
## italian_lextale_score:syllable3 -0.47563 0.47568 -1.000 0.317364
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 13 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.0081802 (tol = 0.002, component 1)
summary_anti_penultimate$summary
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## values ~ syllable + dprime_battery_up_dur + lextale_score + syllable:dprime_battery_up_dur +
## syllable:dprime_battery_up_ptich + syllable:lextale_score +
## (1 | word_play)
## Data: data_filtered
##
## AIC BIC logLik deviance df.resid
## 10870.6 10962.7 -5422.3 10844.6 8806
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.3772 -0.7079 -0.5853 1.1847 2.6029
##
## Random effects:
## Groups Name Variance Std.Dev.
## word_play (Intercept) 0.07606 0.2758
## Number of obs: 8819, groups: word_play, 24
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.5634 0.2436 -6.419 1.37e-10 ***
## syllable2 -0.4597 0.4323 -1.064 0.2875
## syllable3 -0.5004 0.3779 -1.324 0.1854
## dprime_battery_up_dur -1.7392 0.3613 -4.814 1.48e-06 ***
## lextale_score 0.6349 0.2927 2.169 0.0301 *
## syllable2:dprime_battery_up_dur -0.7630 0.6721 -1.135 0.2562
## syllable3:dprime_battery_up_dur -0.4225 0.5793 -0.729 0.4658
## syllable1:dprime_battery_up_ptich 0.8879 0.2138 4.154 3.27e-05 ***
## syllable2:dprime_battery_up_ptich 0.7265 0.3539 2.053 0.0401 *
## syllable3:dprime_battery_up_ptich 0.4800 0.2869 1.673 0.0943 .
## syllable2:lextale_score 0.9517 0.5350 1.779 0.0753 .
## syllable3:lextale_score 0.9875 0.4674 2.113 0.0346 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) syllb2 syllb3 dpr___ lxtl_s
## syllable2 -0.533
## syllable3 -0.610 0.343
## dprm_bttr__ 0.134 -0.076 -0.086
## lextale_scr -0.940 0.529 0.606 0.046
## syllbl2:dprm_bttry_p_d -0.072 0.150 0.046 -0.539 -0.024
## syllbl3:dprm_bttry_p_d -0.083 0.047 0.147 -0.624 -0.029
## syllbl1:___ -0.038 0.021 0.024 -0.198 0.117
## syllbl2:dprm_bttry_p_p 0.000 -0.059 0.001 0.000 0.000
## syllbl3:dprm_bttry_p_p 0.000 0.000 -0.057 0.000 0.000
## syllbl2:lx_ 0.514 -0.965 -0.331 -0.025 -0.546
## syllbl3:lx_ 0.589 -0.331 -0.966 -0.029 -0.627
## syllbl2:dprm_bttry_p_d syllbl3:dprm_bttry_p_d s1:___
## syllable2
## syllable3
## dprm_bttr__
## lextale_scr
## syllbl2:dprm_bttry_p_d
## syllbl3:dprm_bttry_p_d 0.337
## syllbl1:___ 0.107 0.124
## syllbl2:dprm_bttry_p_p -0.200 0.000 0.000
## syllbl3:dprm_bttry_p_p 0.000 -0.180 0.000
## syllbl2:lx_ 0.030 0.016 -0.064
## syllbl3:lx_ 0.016 0.030 -0.073
## syllbl2:dprm_bttry_p_p syllbl3:dprm_bttry_p_p syl2:_
## syllable2
## syllable3
## dprm_bttr__
## lextale_scr
## syllbl2:dprm_bttry_p_d
## syllbl3:dprm_bttry_p_d
## syllbl1:___
## syllbl2:dprm_bttry_p_p
## syllbl3:dprm_bttry_p_p 0.000
## syllbl2:lx_ 0.132 0.000
## syllbl3:lx_ -0.001 0.128 0.342
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.00211687 (tol = 0.002, component 1)
summary_penultimate$plot
summary_anti_penultimate$plot
library(conflicted)
ets_exp1_all_agg_part_wider<-comp_tar_longer%>%
pivot_wider(names_from = names,values_from = values)%>%
filter(syllable!=0)%>%
group_by(participant.private.id,stress_play,syllable)%>%
summarise(mean_target_looks=mean(target_looks),
mean_competitor_looks=mean(competitor_looks),
mean_dist1_looks=mean(dist1_looks),
mean_dist2_looks=mean(dist2_looks))%>%
mutate(proportion_o_looks=mean_target_looks-mean_competitor_looks)%>%
left_join(battery_agg_wider_simp)%>%
left_join(digit_cleaned_simp)%>%
left_join(lextale_agg_cleaned_simp)%>%
left_join(italian_lextale_agg_cleaned_simp)%>%
left_join(autism_cleaned)
ets_exp1_all_agg_part_wider%>%
ggplot(aes(x=dprime_battery_up_dur,y=proportion_o_looks))+
geom_point()+
geom_smooth()+
facet_grid(syllable~stress_play)
ets_exp1_all_agg_part_wider%>%
ggplot(aes(x=dprime_battery_up_formants,y=proportion_o_looks))+
geom_point()+
geom_smooth()+
facet_grid(syllable~stress_play)
ets_exp1_all_agg_part_wider%>%
ggplot(aes(x=dprime_battery_up_ptich,y=proportion_o_looks))+
geom_point()+
geom_smooth()+
facet_grid(syllable~stress_play)
ets_exp1_all_agg_part_wider%>%
ggplot(aes(x=dprime_battery_up_risetime,y=proportion_o_looks))+
geom_point()+
geom_smooth()+
facet_grid(syllable~stress_play)
ets_exp1_all_agg_part_wider%>%
ggplot(aes(x=autism_score,y=proportion_o_looks))+
geom_point()+
geom_smooth()+
facet_grid(syllable~stress_play)
ets_exp1_all_agg_part_wider%>%
ggplot(aes(x=lextale_score,y=proportion_o_looks))+
geom_point()+
geom_smooth()+
facet_grid(syllable~stress_play)
ets_exp1_all_agg_part_wider%>%
ggplot(aes(x=italian_lextale_score,y=proportion_o_looks))+
geom_point()+
geom_smooth()+
facet_grid(syllable~stress_play)
ets_exp1_all_agg_part_wider%>%
ggplot(aes(x=mean_working_memory,y=proportion_o_looks))+
geom_point()+
geom_smooth()+
facet_grid(syllable~stress_play)
ets_exp1_all_agg_part_wider%>%
ggplot(aes(y=proportion_o_looks,x=dprime_battery_up_ptich,size=dprime_battery_up_dur,alpha=.2))+
geom_point()+
geom_smooth()+
facet_grid(syllable~stress_play)
ets_exp1_all_agg_part_wider%>%
ggplot(aes(y=mean_target_looks,x=autism_score,size=italian_lextale_score,color=syllable,alpha=.2))+
geom_point()+
geom_smooth()+
facet_grid(.~stress_play)
write.csv(ets_exp1_all_agg_part_wider, "../../ppcc_iflu_data/ets_exp1_all_agg_part_wider.csv")
fit_models_acoustic_id <- function(data) {
# Filter the data
data_filtered <- na.omit(data)
# Create the design matrix and response vector
x <- model.matrix(target_looks ~ (dprime_battery_up_dur +
dprime_battery_up_ptich +
dprime_battery_up_risetime +
dprime_battery_up_formants +
lextale_score + autism_score +
italian_lextale_score +
lextale_score)*
(mean_pitch_scaled+
duration_scaled+
mean_amplitude_scaled+
mean_tilt_scaled), data = data_filtered)[, -1]
y <- data_filtered$target_looks
# Set up lambda values for LASSO regression
lambda_search_space <- 10^seq(10, -2, length = 100)
# Fit the LASSO regression model
lasso_model <- glmnet(x, y, alpha = 1, lambda = lambda_search_space, family = "binomial")
# Cross-validation to find the best lambda
cv <- cv.glmnet(x, y, alpha = 1, family = "binomial")
best_lambda <- cv$lambda.min
# Extract coefficients for the best lambda
coef_lasso_best <- coef(lasso_model, s = best_lambda)
# Convert coefficients to data frame for visualization
coef_df <- as.data.frame(as.matrix(coef_lasso_best))
coef_df$Predictor <- rownames(coef_df)
rownames(coef_df) <- NULL
# Visualize the coefficients
coef_plot <- ggplot(coef_df, aes(x = Predictor, y = s1)) +
geom_bar(stat = "identity") +
coord_flip() +
labs(title = paste("LASSO Regression Coefficients at Best Lambda (", round(best_lambda, 4), ")", sep = ""),
x = "Predictor",
y = "Coefficient Value") +
theme_minimal()
print(coef_plot)
# Select significant variables based on LASSO coefficients
significant_vars <- coef_df %>%
filter(s1 != 0) %>%
pull(Predictor)
# Clean up the variable names by replacing syllable1, syllable2, and syllable3 with syllable
significant_vars <- gsub("syllable[123]", "syllable", significant_vars)
# Remove duplicate variables
significant_vars <- unique(significant_vars)
# Prepare formula for GLMM using significant variables
significant_vars <- significant_vars[significant_vars != "(Intercept)"] # Remove intercept if present
formula_str <- paste("target_looks ~", paste(significant_vars, collapse = " + "), "+ (1|word)")
# Convert the string to a formula object
formula <- as.formula(formula_str)
formula
# Fit GLMM with selected variables
model_selected <- glmer(formula, data = data_filtered, family = "binomial")
# Print the summary of the GLMM
print(summary(model_selected))
# Return the summary and the plot
list(summary = summary(model_selected), plot = coef_plot)
}
# Prepare datasets
ap_data_1_id <- ap_data_1 %>%
left_join(battery_agg_wider_simp) %>%
left_join(digit_cleaned_simp) %>%
left_join(lextale_agg_cleaned_simp) %>%
left_join(italian_lextale_agg_cleaned_simp) %>%
left_join(autism_cleaned)
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
ap_data_3_id <- ap_data_3 %>%
left_join(battery_agg_wider_simp) %>%
left_join(digit_cleaned_simp) %>%
left_join(lextale_agg_cleaned_simp) %>%
left_join(italian_lextale_agg_cleaned_simp) %>%
left_join(autism_cleaned)
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
pp_data_1_id <- pp_data_1 %>%
left_join(battery_agg_wider_simp) %>%
left_join(digit_cleaned_simp) %>%
left_join(lextale_agg_cleaned_simp) %>%
left_join(italian_lextale_agg_cleaned_simp) %>%
left_join(autism_cleaned)
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
pp_data_3_id <- pp_data_3 %>%
left_join(battery_agg_wider_simp) %>%
left_join(digit_cleaned_simp) %>%
left_join(lextale_agg_cleaned_simp) %>%
left_join(italian_lextale_agg_cleaned_simp) %>%
left_join(autism_cleaned)
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
result_ap_data_1 <- fit_models_acoustic_id(ap_data_1_id)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.011968 (tol = 0.002, component 1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: target_looks ~ dprime_battery_up_dur + dprime_battery_up_ptich +
## lextale_score + italian_lextale_score + dprime_battery_up_dur:duration_scaled +
## dprime_battery_up_dur:mean_tilt_scaled + dprime_battery_up_ptich:mean_pitch_scaled +
## dprime_battery_up_risetime:mean_pitch_scaled + dprime_battery_up_risetime:mean_tilt_scaled +
## autism_score:mean_tilt_scaled + (1 | word)
## Data: data_filtered
##
## AIC BIC logLik deviance df.resid
## 5082.1 5158.6 -2529.0 5058.1 4345
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.1859 -0.6473 -0.5480 1.2329 2.9375
##
## Random effects:
## Groups Name Variance Std.Dev.
## word (Intercept) 0.08389 0.2896
## Number of obs: 4357, groups: word, 23
##
## Fixed effects:
## Estimate Std. Error z value
## (Intercept) -4.7168 1.7104 -2.758
## dprime_battery_up_dur -1.0733 0.4461 -2.406
## dprime_battery_up_ptich 0.9954 0.2214 4.496
## lextale_score 0.6437 0.3072 2.095
## italian_lextale_score 3.1846 1.7753 1.794
## dprime_battery_up_dur:duration_scaled -0.8175 0.4079 -2.004
## dprime_battery_up_dur:mean_tilt_scaled -0.1021 0.3737 -0.273
## dprime_battery_up_ptich:mean_pitch_scaled 0.2781 0.2303 1.207
## mean_pitch_scaled:dprime_battery_up_risetime 0.2858 0.3211 0.890
## mean_tilt_scaled:dprime_battery_up_risetime -0.6141 0.2922 -2.101
## mean_tilt_scaled:autism_score 0.2614 0.1680 1.556
## Pr(>|z|)
## (Intercept) 0.00582 **
## dprime_battery_up_dur 0.01614 *
## dprime_battery_up_ptich 6.93e-06 ***
## lextale_score 0.03614 *
## italian_lextale_score 0.07284 .
## dprime_battery_up_dur:duration_scaled 0.04507 *
## dprime_battery_up_dur:mean_tilt_scaled 0.78463
## dprime_battery_up_ptich:mean_pitch_scaled 0.22728
## mean_pitch_scaled:dprime_battery_up_risetime 0.37338
## mean_tilt_scaled:dprime_battery_up_risetime 0.03561 *
## mean_tilt_scaled:autism_score 0.11969
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) dprm_bttry_p_d dprm_bttry_p_p lxtl_s itln__ dp___:_
## dprm_bttry_p_d 0.059
## dprm_bttry_p_p -0.023 -0.156
## lextale_scr 0.057 0.034 0.114
## itln_lxtl_s -0.989 -0.042 0.017 -0.195
## dprm_bt__:_ -0.001 -0.515 -0.012 0.003 0.001
## dprm_bttry_p_d:__ -0.023 -0.140 0.009 0.006 0.016 -0.172
## dprm_bttry_p_p:__ 0.004 -0.018 -0.066 -0.006 -0.004 0.007
## mn_ptc_:___ -0.024 -0.012 -0.008 -0.019 0.026 0.001
## mn_tlt_:___ -0.045 0.007 -0.110 -0.054 0.053 0.017
## mn_tlt_sc:_ -0.075 -0.063 -0.023 -0.004 0.063 -0.037
## dprm_bttry_p_d:__ dprm_bttry_p_p:__ mn_p_:___ mn_t_:___
## dprm_bttry_p_d
## dprm_bttry_p_p
## lextale_scr
## itln_lxtl_s
## dprm_bt__:_
## dprm_bttry_p_d:__
## dprm_bttry_p_p:__ 0.047
## mn_ptc_:___ -0.019 -0.436
## mn_tlt_:___ -0.246 -0.014 0.191
## mn_tlt_sc:_ 0.463 0.003 0.015 0.021
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.011968 (tol = 0.002, component 1)
result_pp_data_1 <- fit_models_acoustic_id(pp_data_1_id)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00760328 (tol = 0.002, component 1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: target_looks ~ dprime_battery_up_risetime + mean_pitch_scaled +
## duration_scaled + mean_amplitude_scaled + dprime_battery_up_dur:mean_amplitude_scaled +
## dprime_battery_up_ptich:mean_pitch_scaled + dprime_battery_up_ptich:duration_scaled +
## lextale_score:mean_tilt_scaled + autism_score:mean_tilt_scaled +
## italian_lextale_score:mean_amplitude_scaled + (1 | word)
## Data: data_filtered
##
## AIC BIC logLik deviance df.resid
## 3089.8 3160.3 -1532.9 3065.8 2622
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -0.9948 -0.6322 -0.5346 1.1321 2.3729
##
## Random effects:
## Groups Name Variance Std.Dev.
## word (Intercept) 0.1263 0.3554
## Number of obs: 2634, groups: word, 24
##
## Fixed effects:
## Estimate Std. Error z value
## (Intercept) -1.04585 0.29216 -3.580
## dprime_battery_up_risetime -0.39073 0.40509 -0.965
## mean_pitch_scaled -0.06378 0.09418 -0.677
## duration_scaled -0.11509 0.27094 -0.425
## mean_amplitude_scaled 0.58582 1.80642 0.324
## mean_amplitude_scaled:dprime_battery_up_dur 0.25248 0.38781 0.651
## mean_pitch_scaled:dprime_battery_up_ptich 0.11475 0.27933 0.411
## duration_scaled:dprime_battery_up_ptich 0.19011 0.30430 0.625
## lextale_score:mean_tilt_scaled 0.18939 0.19524 0.970
## mean_tilt_scaled:autism_score 0.18958 0.30221 0.627
## mean_amplitude_scaled:italian_lextale_score -0.66010 1.84596 -0.358
## Pr(>|z|)
## (Intercept) 0.000344 ***
## dprime_battery_up_risetime 0.334766
## mean_pitch_scaled 0.498288
## duration_scaled 0.670999
## mean_amplitude_scaled 0.745712
## mean_amplitude_scaled:dprime_battery_up_dur 0.515024
## mean_pitch_scaled:dprime_battery_up_ptich 0.681214
## duration_scaled:dprime_battery_up_ptich 0.532131
## lextale_score:mean_tilt_scaled 0.332025
## mean_tilt_scaled:autism_score 0.530449
## mean_amplitude_scaled:italian_lextale_score 0.720650
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) dpr___ mn_pt_ drtn_s mn_mp_ mn_m_:___ mn_p_:___ d_:___
## dprm_bttr__ 0.114
## mn_ptch_scl -0.093 -0.003
## duratn_scld 0.925 0.052 -0.192
## mn_mpltd_sc -0.014 -0.028 0.004 -0.009
## mn_mpl_:___ 0.001 -0.010 0.014 0.002 0.046
## mn_ptc_:___ -0.004 -0.009 0.417 -0.037 0.013 0.043
## drtn_sc:___ 0.040 0.322 -0.090 0.160 0.003 0.001 -0.223
## lxtl_scr:__ 0.221 -0.005 0.085 0.130 0.084 -0.027 -0.049 0.068
## mn_tlt_sc:_ 0.056 0.012 0.053 0.021 -0.104 0.041 0.060 -0.073
## mn_mplt_:__ 0.001 0.028 -0.001 0.000 -0.999 -0.019 -0.012 -0.003
## lx_:__ mn__:_
## dprm_bttr__
## mn_ptch_scl
## duratn_scld
## mn_mpltd_sc
## mn_mpl_:___
## mn_ptc_:___
## drtn_sc:___
## lxtl_scr:__
## mn_tlt_sc:_ -0.700
## mn_mplt_:__ -0.096 0.103
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.00760328 (tol = 0.002, component 1)
result_ap_data_3 <- fit_models_acoustic_id(ap_data_3_id)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.0039877 (tol = 0.002, component 1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## target_looks ~ dprime_battery_up_dur + lextale_score + dprime_battery_up_dur:mean_pitch_scaled +
## dprime_battery_up_dur:mean_tilt_scaled + dprime_battery_up_ptich:mean_pitch_scaled +
## dprime_battery_up_ptich:mean_tilt_scaled + dprime_battery_up_risetime:mean_pitch_scaled +
## lextale_score:mean_pitch_scaled + (1 | word)
## Data: data_filtered
##
## AIC BIC logLik deviance df.resid
## 3195.0 3253.1 -1587.5 3175.0 2465
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.4416 -0.7549 -0.6334 1.1529 1.9937
##
## Random effects:
## Groups Name Variance Std.Dev.
## word (Intercept) 0.03065 0.1751
## Number of obs: 2475, groups: word, 23
##
## Fixed effects:
## Estimate Std. Error z value
## (Intercept) -1.9924 0.3051 -6.531
## dprime_battery_up_dur -2.3587 1.4371 -1.641
## lextale_score 0.6420 0.4704 1.365
## dprime_battery_up_dur:mean_pitch_scaled -0.2635 1.6336 -0.161
## dprime_battery_up_dur:mean_tilt_scaled 0.5143 0.5085 1.011
## mean_pitch_scaled:dprime_battery_up_ptich -0.7025 0.3616 -1.942
## mean_tilt_scaled:dprime_battery_up_ptich 0.4910 0.3569 1.376
## mean_pitch_scaled:dprime_battery_up_risetime -0.7644 0.4579 -1.669
## lextale_score:mean_pitch_scaled -1.1386 0.3560 -3.199
## Pr(>|z|)
## (Intercept) 6.52e-11 ***
## dprime_battery_up_dur 0.10074
## lextale_score 0.17236
## dprime_battery_up_dur:mean_pitch_scaled 0.87183
## dprime_battery_up_dur:mean_tilt_scaled 0.31184
## mean_pitch_scaled:dprime_battery_up_ptich 0.05208 .
## mean_tilt_scaled:dprime_battery_up_ptich 0.16885
## mean_pitch_scaled:dprime_battery_up_risetime 0.09509 .
## lextale_score:mean_pitch_scaled 0.00138 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) dpr___ lxtl_s dprm_bttry_p_dr:mn_p_
## dprm_bttr__ 0.046
## lextale_scr -0.756 0.417
## dprm_bttry_p_dr:mn_p_ 0.003 0.942 0.427
## dprm_bttry_p_dr:mn_t_ -0.005 0.327 0.054 0.263
## mn_ptch_scld:dprm_bttry_p_p 0.105 0.017 -0.065 -0.043
## mn_tlt_:___ 0.006 -0.102 -0.106 -0.069
## mn_ptch_scld:dprm_bttry_p_r -0.119 -0.003 0.091 -0.055
## lxtl_scr:__ 0.009 0.648 0.608 0.678
## dprm_bttry_p_dr:mn_t_ mn_ptch_scld:dprm_bttry_p_p
## dprm_bttr__
## lextale_scr
## dprm_bttry_p_dr:mn_p_
## dprm_bttry_p_dr:mn_t_
## mn_ptch_scld:dprm_bttry_p_p 0.080
## mn_tlt_:___ -0.517 -0.163
## mn_ptch_scld:dprm_bttry_p_r 0.011 -0.325
## lxtl_scr:__ 0.069 0.116
## mn_t_:___ mn_ptch_scld:dprm_bttry_p_r
## dprm_bttr__
## lextale_scr
## dprm_bttry_p_dr:mn_p_
## dprm_bttry_p_dr:mn_t_
## mn_ptch_scld:dprm_bttry_p_p
## mn_tlt_:___
## mn_ptch_scld:dprm_bttry_p_r 0.000
## lxtl_scr:__ -0.143 0.022
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.0039877 (tol = 0.002, component 1)
result_pp_data_3 <- fit_models_acoustic_id(pp_data_3_id)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.0557315 (tol = 0.002, component 1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: target_looks ~ dprime_battery_up_dur + lextale_score + autism_score +
## italian_lextale_score + dprime_battery_up_dur:mean_pitch_scaled +
## dprime_battery_up_formants:duration_scaled + autism_score:mean_pitch_scaled +
## (1 | word)
## Data: data_filtered
##
## AIC BIC logLik deviance df.resid
## 3392.5 3445.1 -1687.2 3374.5 2552
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.2893 -0.7930 -0.6725 1.1483 1.7532
##
## Random effects:
## Groups Name Variance Std.Dev.
## word (Intercept) 0.08264 0.2875
## Number of obs: 2561, groups: word, 24
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -7.6840 2.2153 -3.469 0.000523
## dprime_battery_up_dur -0.8787 0.6379 -1.377 0.168388
## lextale_score 0.7306 0.3624 2.016 0.043791
## autism_score 0.3281 0.3595 0.913 0.361505
## italian_lextale_score 6.5459 2.2736 2.879 0.003989
## dprime_battery_up_dur:mean_pitch_scaled -0.2193 0.5289 -0.415 0.678439
## dprime_battery_up_formants:duration_scaled 0.6316 0.3416 1.849 0.064511
## autism_score:mean_pitch_scaled 0.1938 0.2296 0.844 0.398641
##
## (Intercept) ***
## dprime_battery_up_dur
## lextale_score *
## autism_score
## italian_lextale_score **
## dprime_battery_up_dur:mean_pitch_scaled
## dprime_battery_up_formants:duration_scaled .
## autism_score:mean_pitch_scaled
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) dpr___ lxtl_s atsm_s itln__ d___:__ dp___:_
## dprm_bttr__ 0.063
## lextale_scr 0.062 0.034
## autism_scor -0.243 0.352 0.016
## itln_lxtl_s -0.989 -0.064 -0.190 0.193
## dprm_b__:__ -0.020 -0.701 -0.006 -0.329 0.023
## dprm_bt__:_ -0.195 -0.305 0.065 -0.099 0.207 0.125
## atsm_scr:__ 0.003 -0.394 -0.010 -0.513 -0.003 0.589 -0.038
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.0557315 (tol = 0.002, component 1)
# Print summaries and display plots
print(result_ap_data_1$summary)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: target_looks ~ dprime_battery_up_dur + dprime_battery_up_ptich +
## lextale_score + italian_lextale_score + dprime_battery_up_dur:duration_scaled +
## dprime_battery_up_dur:mean_tilt_scaled + dprime_battery_up_ptich:mean_pitch_scaled +
## dprime_battery_up_risetime:mean_pitch_scaled + dprime_battery_up_risetime:mean_tilt_scaled +
## autism_score:mean_tilt_scaled + (1 | word)
## Data: data_filtered
##
## AIC BIC logLik deviance df.resid
## 5082.1 5158.6 -2529.0 5058.1 4345
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.1859 -0.6473 -0.5480 1.2329 2.9375
##
## Random effects:
## Groups Name Variance Std.Dev.
## word (Intercept) 0.08389 0.2896
## Number of obs: 4357, groups: word, 23
##
## Fixed effects:
## Estimate Std. Error z value
## (Intercept) -4.7168 1.7104 -2.758
## dprime_battery_up_dur -1.0733 0.4461 -2.406
## dprime_battery_up_ptich 0.9954 0.2214 4.496
## lextale_score 0.6437 0.3072 2.095
## italian_lextale_score 3.1846 1.7753 1.794
## dprime_battery_up_dur:duration_scaled -0.8175 0.4079 -2.004
## dprime_battery_up_dur:mean_tilt_scaled -0.1021 0.3737 -0.273
## dprime_battery_up_ptich:mean_pitch_scaled 0.2781 0.2303 1.207
## mean_pitch_scaled:dprime_battery_up_risetime 0.2858 0.3211 0.890
## mean_tilt_scaled:dprime_battery_up_risetime -0.6141 0.2922 -2.101
## mean_tilt_scaled:autism_score 0.2614 0.1680 1.556
## Pr(>|z|)
## (Intercept) 0.00582 **
## dprime_battery_up_dur 0.01614 *
## dprime_battery_up_ptich 6.93e-06 ***
## lextale_score 0.03614 *
## italian_lextale_score 0.07284 .
## dprime_battery_up_dur:duration_scaled 0.04507 *
## dprime_battery_up_dur:mean_tilt_scaled 0.78463
## dprime_battery_up_ptich:mean_pitch_scaled 0.22728
## mean_pitch_scaled:dprime_battery_up_risetime 0.37338
## mean_tilt_scaled:dprime_battery_up_risetime 0.03561 *
## mean_tilt_scaled:autism_score 0.11969
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) dprm_bttry_p_d dprm_bttry_p_p lxtl_s itln__ dp___:_
## dprm_bttry_p_d 0.059
## dprm_bttry_p_p -0.023 -0.156
## lextale_scr 0.057 0.034 0.114
## itln_lxtl_s -0.989 -0.042 0.017 -0.195
## dprm_bt__:_ -0.001 -0.515 -0.012 0.003 0.001
## dprm_bttry_p_d:__ -0.023 -0.140 0.009 0.006 0.016 -0.172
## dprm_bttry_p_p:__ 0.004 -0.018 -0.066 -0.006 -0.004 0.007
## mn_ptc_:___ -0.024 -0.012 -0.008 -0.019 0.026 0.001
## mn_tlt_:___ -0.045 0.007 -0.110 -0.054 0.053 0.017
## mn_tlt_sc:_ -0.075 -0.063 -0.023 -0.004 0.063 -0.037
## dprm_bttry_p_d:__ dprm_bttry_p_p:__ mn_p_:___ mn_t_:___
## dprm_bttry_p_d
## dprm_bttry_p_p
## lextale_scr
## itln_lxtl_s
## dprm_bt__:_
## dprm_bttry_p_d:__
## dprm_bttry_p_p:__ 0.047
## mn_ptc_:___ -0.019 -0.436
## mn_tlt_:___ -0.246 -0.014 0.191
## mn_tlt_sc:_ 0.463 0.003 0.015 0.021
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.011968 (tol = 0.002, component 1)
print(result_pp_data_1$summary)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: target_looks ~ dprime_battery_up_risetime + mean_pitch_scaled +
## duration_scaled + mean_amplitude_scaled + dprime_battery_up_dur:mean_amplitude_scaled +
## dprime_battery_up_ptich:mean_pitch_scaled + dprime_battery_up_ptich:duration_scaled +
## lextale_score:mean_tilt_scaled + autism_score:mean_tilt_scaled +
## italian_lextale_score:mean_amplitude_scaled + (1 | word)
## Data: data_filtered
##
## AIC BIC logLik deviance df.resid
## 3089.8 3160.3 -1532.9 3065.8 2622
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -0.9948 -0.6322 -0.5346 1.1321 2.3729
##
## Random effects:
## Groups Name Variance Std.Dev.
## word (Intercept) 0.1263 0.3554
## Number of obs: 2634, groups: word, 24
##
## Fixed effects:
## Estimate Std. Error z value
## (Intercept) -1.04585 0.29216 -3.580
## dprime_battery_up_risetime -0.39073 0.40509 -0.965
## mean_pitch_scaled -0.06378 0.09418 -0.677
## duration_scaled -0.11509 0.27094 -0.425
## mean_amplitude_scaled 0.58582 1.80642 0.324
## mean_amplitude_scaled:dprime_battery_up_dur 0.25248 0.38781 0.651
## mean_pitch_scaled:dprime_battery_up_ptich 0.11475 0.27933 0.411
## duration_scaled:dprime_battery_up_ptich 0.19011 0.30430 0.625
## lextale_score:mean_tilt_scaled 0.18939 0.19524 0.970
## mean_tilt_scaled:autism_score 0.18958 0.30221 0.627
## mean_amplitude_scaled:italian_lextale_score -0.66010 1.84596 -0.358
## Pr(>|z|)
## (Intercept) 0.000344 ***
## dprime_battery_up_risetime 0.334766
## mean_pitch_scaled 0.498288
## duration_scaled 0.670999
## mean_amplitude_scaled 0.745712
## mean_amplitude_scaled:dprime_battery_up_dur 0.515024
## mean_pitch_scaled:dprime_battery_up_ptich 0.681214
## duration_scaled:dprime_battery_up_ptich 0.532131
## lextale_score:mean_tilt_scaled 0.332025
## mean_tilt_scaled:autism_score 0.530449
## mean_amplitude_scaled:italian_lextale_score 0.720650
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) dpr___ mn_pt_ drtn_s mn_mp_ mn_m_:___ mn_p_:___ d_:___
## dprm_bttr__ 0.114
## mn_ptch_scl -0.093 -0.003
## duratn_scld 0.925 0.052 -0.192
## mn_mpltd_sc -0.014 -0.028 0.004 -0.009
## mn_mpl_:___ 0.001 -0.010 0.014 0.002 0.046
## mn_ptc_:___ -0.004 -0.009 0.417 -0.037 0.013 0.043
## drtn_sc:___ 0.040 0.322 -0.090 0.160 0.003 0.001 -0.223
## lxtl_scr:__ 0.221 -0.005 0.085 0.130 0.084 -0.027 -0.049 0.068
## mn_tlt_sc:_ 0.056 0.012 0.053 0.021 -0.104 0.041 0.060 -0.073
## mn_mplt_:__ 0.001 0.028 -0.001 0.000 -0.999 -0.019 -0.012 -0.003
## lx_:__ mn__:_
## dprm_bttr__
## mn_ptch_scl
## duratn_scld
## mn_mpltd_sc
## mn_mpl_:___
## mn_ptc_:___
## drtn_sc:___
## lxtl_scr:__
## mn_tlt_sc:_ -0.700
## mn_mplt_:__ -0.096 0.103
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.00760328 (tol = 0.002, component 1)
print(result_ap_data_3$summary)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## target_looks ~ dprime_battery_up_dur + lextale_score + dprime_battery_up_dur:mean_pitch_scaled +
## dprime_battery_up_dur:mean_tilt_scaled + dprime_battery_up_ptich:mean_pitch_scaled +
## dprime_battery_up_ptich:mean_tilt_scaled + dprime_battery_up_risetime:mean_pitch_scaled +
## lextale_score:mean_pitch_scaled + (1 | word)
## Data: data_filtered
##
## AIC BIC logLik deviance df.resid
## 3195.0 3253.1 -1587.5 3175.0 2465
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.4416 -0.7549 -0.6334 1.1529 1.9937
##
## Random effects:
## Groups Name Variance Std.Dev.
## word (Intercept) 0.03065 0.1751
## Number of obs: 2475, groups: word, 23
##
## Fixed effects:
## Estimate Std. Error z value
## (Intercept) -1.9924 0.3051 -6.531
## dprime_battery_up_dur -2.3587 1.4371 -1.641
## lextale_score 0.6420 0.4704 1.365
## dprime_battery_up_dur:mean_pitch_scaled -0.2635 1.6336 -0.161
## dprime_battery_up_dur:mean_tilt_scaled 0.5143 0.5085 1.011
## mean_pitch_scaled:dprime_battery_up_ptich -0.7025 0.3616 -1.942
## mean_tilt_scaled:dprime_battery_up_ptich 0.4910 0.3569 1.376
## mean_pitch_scaled:dprime_battery_up_risetime -0.7644 0.4579 -1.669
## lextale_score:mean_pitch_scaled -1.1386 0.3560 -3.199
## Pr(>|z|)
## (Intercept) 6.52e-11 ***
## dprime_battery_up_dur 0.10074
## lextale_score 0.17236
## dprime_battery_up_dur:mean_pitch_scaled 0.87183
## dprime_battery_up_dur:mean_tilt_scaled 0.31184
## mean_pitch_scaled:dprime_battery_up_ptich 0.05208 .
## mean_tilt_scaled:dprime_battery_up_ptich 0.16885
## mean_pitch_scaled:dprime_battery_up_risetime 0.09509 .
## lextale_score:mean_pitch_scaled 0.00138 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) dpr___ lxtl_s dprm_bttry_p_dr:mn_p_
## dprm_bttr__ 0.046
## lextale_scr -0.756 0.417
## dprm_bttry_p_dr:mn_p_ 0.003 0.942 0.427
## dprm_bttry_p_dr:mn_t_ -0.005 0.327 0.054 0.263
## mn_ptch_scld:dprm_bttry_p_p 0.105 0.017 -0.065 -0.043
## mn_tlt_:___ 0.006 -0.102 -0.106 -0.069
## mn_ptch_scld:dprm_bttry_p_r -0.119 -0.003 0.091 -0.055
## lxtl_scr:__ 0.009 0.648 0.608 0.678
## dprm_bttry_p_dr:mn_t_ mn_ptch_scld:dprm_bttry_p_p
## dprm_bttr__
## lextale_scr
## dprm_bttry_p_dr:mn_p_
## dprm_bttry_p_dr:mn_t_
## mn_ptch_scld:dprm_bttry_p_p 0.080
## mn_tlt_:___ -0.517 -0.163
## mn_ptch_scld:dprm_bttry_p_r 0.011 -0.325
## lxtl_scr:__ 0.069 0.116
## mn_t_:___ mn_ptch_scld:dprm_bttry_p_r
## dprm_bttr__
## lextale_scr
## dprm_bttry_p_dr:mn_p_
## dprm_bttry_p_dr:mn_t_
## mn_ptch_scld:dprm_bttry_p_p
## mn_tlt_:___
## mn_ptch_scld:dprm_bttry_p_r 0.000
## lxtl_scr:__ -0.143 0.022
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.0039877 (tol = 0.002, component 1)
print(result_pp_data_3$summary)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: target_looks ~ dprime_battery_up_dur + lextale_score + autism_score +
## italian_lextale_score + dprime_battery_up_dur:mean_pitch_scaled +
## dprime_battery_up_formants:duration_scaled + autism_score:mean_pitch_scaled +
## (1 | word)
## Data: data_filtered
##
## AIC BIC logLik deviance df.resid
## 3392.5 3445.1 -1687.2 3374.5 2552
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.2893 -0.7930 -0.6725 1.1483 1.7532
##
## Random effects:
## Groups Name Variance Std.Dev.
## word (Intercept) 0.08264 0.2875
## Number of obs: 2561, groups: word, 24
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -7.6840 2.2153 -3.469 0.000523
## dprime_battery_up_dur -0.8787 0.6379 -1.377 0.168388
## lextale_score 0.7306 0.3624 2.016 0.043791
## autism_score 0.3281 0.3595 0.913 0.361505
## italian_lextale_score 6.5459 2.2736 2.879 0.003989
## dprime_battery_up_dur:mean_pitch_scaled -0.2193 0.5289 -0.415 0.678439
## dprime_battery_up_formants:duration_scaled 0.6316 0.3416 1.849 0.064511
## autism_score:mean_pitch_scaled 0.1938 0.2296 0.844 0.398641
##
## (Intercept) ***
## dprime_battery_up_dur
## lextale_score *
## autism_score
## italian_lextale_score **
## dprime_battery_up_dur:mean_pitch_scaled
## dprime_battery_up_formants:duration_scaled .
## autism_score:mean_pitch_scaled
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) dpr___ lxtl_s atsm_s itln__ d___:__ dp___:_
## dprm_bttr__ 0.063
## lextale_scr 0.062 0.034
## autism_scor -0.243 0.352 0.016
## itln_lxtl_s -0.989 -0.064 -0.190 0.193
## dprm_b__:__ -0.020 -0.701 -0.006 -0.329 0.023
## dprm_bt__:_ -0.195 -0.305 0.065 -0.099 0.207 0.125
## atsm_scr:__ 0.003 -0.394 -0.010 -0.513 -0.003 0.589 -0.038
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.0557315 (tol = 0.002, component 1)
print(result_ap_data_1$plot)
print(result_pp_data_1$plot)
print(result_ap_data_3$plot)
print(result_pp_data_3$plot)
# Install and load ggtext if not already installed
if (!requireNamespace("ggtext", quietly = TRUE)) {
install.packages("ggtext")
}
library(ggtext)
# Define variables
acoustic_vars <- c("mean_pitch_scaled", "mean_amplitude_scaled", "mean_tilt_scaled", "duration_scaled")
id_vars <- c("dprime_battery_up_dur", "dprime_battery_up_ptich", "dprime_battery_up_risetime", "dprime_battery_up_formants", "lextale_score", "autism_score", "italian_lextale_score", "lextale_score")
# Convert the summaries into data frames
result_ap_data_1_coefs <- as.data.frame(result_ap_data_1$summary$coef)
result_ap_data_1_coefs$model <- "ap_data_1"
result_ap_data_3_coefs <- as.data.frame(result_ap_data_3$summary$coef)
result_ap_data_3_coefs$model <- "ap_data_3"
result_pp_data_1_coefs <- as.data.frame(result_pp_data_1$summary$coef)
result_pp_data_1_coefs$model <- "pp_data_1"
result_pp_data_3_coefs <- as.data.frame(result_pp_data_3$summary$coef)
result_pp_data_3_coefs$model <- "pp_data_3"
all_coefs <- rbind(result_ap_data_1_coefs, result_ap_data_3_coefs, result_pp_data_1_coefs, result_pp_data_3_coefs)
# Prepare data for plotting
all_coefs <- all_coefs %>%
rownames_to_column("term") %>%
mutate(conf.low = Estimate - 1.96 * `Std. Error`,
conf.high = Estimate + 1.96 * `Std. Error`,
significance = case_when(
`Pr(>|z|)` < 0.001 ~ "***",
`Pr(>|z|)` < 0.01 ~ "**",
`Pr(>|z|)` < 0.05 ~ "*",
TRUE ~ ""
),
model_type = ifelse(str_detect(model, "1"), "Model 1", "Model 3"),
model_color = ifelse(str_detect(model, "pp"), "PP", "AP"),
term_label = case_when(
term %in% acoustic_vars & term %in% id_vars ~ "both",
term %in% acoustic_vars ~ "acoustic",
term %in% id_vars ~ "id",
TRUE ~ "other"
),
interaction_label = case_when(
str_detect(term, "Intercept") ~ 1,
str_detect(term, ":") ~ 3,
TRUE ~ 2
))
# Update term names for better readability and bold acoustic terms
all_coefs <- all_coefs %>%
mutate(old_term = term) %>%
mutate(term = str_replace_all(term, "1", "")) %>%
mutate(term = str_replace_all(term, "2", "")) %>%
mutate(term = str_replace_all(term, "3", "")) %>%
mutate(term = str_replace_all(term, "_", " ")) %>%
mutate(term = str_replace_all(term, "dprime battery up", "(battery d')")) %>%
mutate(term = str_replace_all(term, "mean", "")) %>%
mutate(term = str_replace_all(term, "italian lextale score", "lextale (Italian)")) %>%
mutate(term = str_replace_all(term, "lextale score", "lextale (English)")) %>%
mutate(term = str_replace_all(term, "scaled", "")) %>%
mutate(term = str_replace_all(term, "dprime", "")) %>%
mutate(term = str_replace_all(term, "Autism score", "score")) %>%
mutate(term = str_replace_all(term, "tilt", "spec tilt")) %>%
mutate(term = str_replace_all(term, "(battery d') dur", "(battery d') duration")) %>%
mutate(term = ifelse(old_term %in% acoustic_vars, paste0("**", term, "**"), term))
# Define the order of terms
ordered_terms <- all_coefs %>%
arrange(interaction_label, term_label, model) %>%
pull(term) %>%
unique()
ordered_terms
## [1] "(Intercept)" "** pitch **"
## [3] "**duration **" "** amplitude **"
## [5] "(battery d') dur" "(battery d') ptich"
## [7] "lextale (English)" "lextale (Italian)"
## [9] "(battery d') risetime" "autism score"
## [11] "(battery d') dur:duration " "(battery d') dur: spec tilt "
## [13] "(battery d') ptich: pitch " " pitch :(battery d') risetime"
## [15] " spec tilt :(battery d') risetime" " spec tilt :autism score"
## [17] "(battery d') dur: pitch " " pitch :(battery d') ptich"
## [19] " spec tilt :(battery d') ptich" "lextale (English): pitch "
## [21] " amplitude :(battery d') dur" "duration :(battery d') ptich"
## [23] "lextale (English): spec tilt " " amplitude :lextale (Italian)"
## [25] "(battery d') formants:duration " "autism score: pitch "
# Set factor levels for term
all_coefs$term <- factor(all_coefs$term, levels = ordered_terms)
# Function to create coefficient plot with confidence intervals and significance markers
create_coef_plot <- function(coef_df, model_label, remove_y_labels = FALSE) {
plot <- ggplot(coef_df, aes(x = Estimate, y = term, color = model_color)) +
geom_point(position = position_dodge(width = 0.5)) +
geom_errorbarh(aes(xmin = conf.low, xmax = conf.high), height = 0.2, position = position_dodge(width = 0.5)) +
geom_text(aes(label = significance, x = -11, color = model_color), vjust = 0.5, position = position_dodge(width = 0.5)) +
labs(title = paste("Coefficient Estimates with Confidence Intervals -", model_label),
x = "Estimate",
y = if (remove_y_labels) NULL else "Predictor",
color = "Model Type") +
theme_minimal() +
theme(legend.position = "top",
axis.text.y = if (remove_y_labels) element_blank() else element_markdown(),
axis.ticks.y = if (remove_y_labels) element_blank() else element_line()) +
facet_wrap(~ model_type, scales = "free")
return(plot)
}
# Create and display the plot
plot <- create_coef_plot(all_coefs, "")
plot